**TASK: 2 PREDICTIVE ANALYSIS USING MACHINE LEARNING**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load the data
file_path = '/content/Summer-Olympic-medals-1976-to-2008.csv'  # Replace with your file path
data = pd.read_csv(file_path, encoding='ISO-8859-1')

# Drop rows with missing values
data = data.dropna()

# Encode categorical variables
encoder = LabelEncoder()
data['Gender'] = encoder.fit_transform(data['Gender'])
data['Country'] = encoder.fit_transform(data['Country'])
data['Event'] = encoder.fit_transform(data['Event'])
data['Sport'] = encoder.fit_transform(data['Sport'])
data['Medal'] = encoder.fit_transform(data['Medal'])  # Target variable

# Define features (X) and target (y)
features = ['Year', 'Gender', 'Country', 'Event', 'Sport']  # Adjust based on available columns
X = data[features]
y = data['Medal']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

# Feature importance
importance = model.feature_importances_
for i, v in enumerate(importance):
    print(f"Feature: {features[i]}, Importance: {v:.2f}")