In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load the data
file_path = 'C:/Users/sneha/OneDrive/Desktop/college/intern/student_cleaned.csv'
data = pd.read_csv(file_path)

# Convert categorical variables to numeric using LabelEncoder
label_encoder = LabelEncoder()
categorical_columns = ['Gender', 'Income', 'Family_size ', 'Objective', 'Study_pattern', 
                       'Time_spent_pattern', 'Extra_curricular_activity', 'Paused_resumed']

for column in categorical_columns:
    data[column] = label_encoder.fit_transform(data[column])

# Select relevant features for prediction
features = ['Tenth_marks', 'Eleventh_marks', 'Gender', 'Income', 'Family_size ', 
            'Objective', 'Study_pattern', 'Time_spent_pattern', 
            'Extra_curricular_activity', 'Paused_resumed']
target = 'Twelfth_marks'

# Ensure that the feature columns are numeric
data[features] = data[features].apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values (if any)
data.dropna(subset=features + [target], inplace=True)

X = data[features]
y = data[target]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=45)

In [46]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

# Train a Decision Tree Regressor
dt_regressor = DecisionTreeRegressor()
dt_regressor.fit(X_train, y_train)
y_pred_dt = dt_regressor.predict(X_test)

# Evaluate the model
mae_dt = mean_absolute_error(y_test, y_pred_dt)
mse_dt = mean_squared_error(y_test, y_pred_dt)
r2_dt = r2_score(y_test, y_pred_dt)

print(f'Decision Tree Regressor - \nMAE: {mae_dt:.4f}\nMSE: {mse_dt:.4f}\nR2 Score: {r2_dt:.4f}')

Decision Tree Regressor - 
MAE: 2.6412
MSE: 10.8788
R2 Score: 0.4703


In [47]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Train a Linear Regressor
lr_regressor = LinearRegression()
lr_regressor.fit(X_train, y_train)
y_pred_lr = lr_regressor.predict(X_test)

# Evaluate the model
mae_lr = mean_absolute_error(y_test, y_pred_lr)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print(f'Linear Regressor - \nMAE: {mae_lr:.4f}\nMSE: {mse_lr:.4f}\nR2 Score: {r2_lr:.4f}')

Linear Regressor - 
MAE: 1.8528
MSE: 5.9174
R2 Score: 0.7119


In [48]:
# Import the Random Forest Regressor model
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the training data
rf_model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rf_model.predict(X_test)

# Evaluate the model using MAE, MSE, and R2 Score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Random Forest Regressor -')
print(f'MAE: {mae:.4f}')
print(f'MSE: {mse:.4f}')
print(f'R2 Score: {r2:.4f}')

Random Forest Regressor -
MAE: 1.2861
MSE: 3.4446
R2 Score: 0.8323


In [49]:
# Import the K-Nearest Neighbors Regressor model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

# Create a K-Nearest Neighbors Regressor model
knn_model = KNeighborsRegressor(n_neighbors=5)

# Train the model on the training data
knn_model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = knn_model.predict(X_test)

# Evaluate the model using MAE, MSE, and R2 Score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('K-Nearest Neighbors Regressor -')
print(f'MAE: {mae:.4f}')
print(f'MSE: {mse:.4f}')
print(f'R2 Score: {r2:.4f}')

K-Nearest Neighbors Regressor -
MAE: 1.4069
MSE: 4.9894
R2 Score: 0.7570
