In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

train_df=pd.read_csv('/home/nagu/Downloads/train.csv')
test_df=pd.read_csv('/home/nagu/Downloads/test.csv')



# Preprocessing
def preprocess_data(df):
    # Drop 'Roll no' as it is not a feature
    df = df.drop('Roll no', axis=1)
    
    # Encode categorical variables
    label_encoders = {}
    for column in df.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
        
    return df, label_encoders

train_df, train_label_encoders = preprocess_data(train_df)

def preprocess_test_data(df, label_encoders):
    roll_no = df['Roll no']
    df = df.drop('Roll no', axis=1)
    for column, le in label_encoders.items():
        df[column] = le.transform(df[column])
    return df, roll_no

test_df, roll_no_test = preprocess_test_data(test_df, train_label_encoders)

# Split the training data
X = train_df.drop('exam score', axis=1)
y = train_df['exam score']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
test_df_scaled = scaler.transform(test_df)

# Train a RandomForest regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Validate the model
y_val_pred = model.predict(X_val_scaled)
val_mse = mean_squared_error(y_val, y_val_pred)
val_rmse = val_mse ** 0.5

print(f'Validation RMSE: {val_rmse}')

# Predict on test data
test_predictions = model.predict(test_df_scaled)

# Convert predictions to integers
test_predictions_int = test_predictions.round().astype(int)

# Save the predictions
test_results = pd.DataFrame({'Roll no': roll_no_test, 'Predicted exam score': test_predictions_int})
test_results.to_csv('/home/nagu/Downloads/test_predictions.csv', index=False)
print('Predictions saved to /mnt/data/test_predictions.csv')

Validation RMSE: 15.413445961914784
Predictions saved to /mnt/data/test_predictions.csv
