# Student Performance

## Importing the dataset

In [None]:
# importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.express as px

# Set pandas display options
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.width', 1000)  # Adjust the width of the display

# Configure matplotlib settings
plt.rcParams['figure.figsize'] = [10, 6]  # Set default figure size
plt.rcParams['axes.grid'] = True  # Enable grid by default

# Set seaborn style
sns.set(style="whitegrid")

# Optional: Set numpy display options
np.set_printoptions(threshold=np.inf)  # Display entire numpy arrays


## **Dataset Description**:
This dataset consists of information related to students' academic performance, including various personal and academic attributes. It contains 10 rows (data points) and 9 columns (features), as summarized below:

1. **StudentID** (Numeric): A unique identifier for each student. It helps distinguish between individual students in the dataset.
   
2. **Name** (Categorical/String): The name of the student. This attribute might not be directly useful for modeling, but it serves as an identifier for each record.

3. **Gender** (Categorical/String): The gender of the student (e.g., "Male" or "Female"). This feature could be used to examine any potential gender-related trends in the dataset.

4. **AttendanceRate** (Numeric): The percentage of classes attended by the student. A higher attendance rate may correlate with better academic performance.

5. **StudyHoursPerWeek** (Numeric): The number of hours the student spends studying per week. This feature is expected to have a significant impact on the student's final grade.

6. **PreviousGrade** (Numeric): The grade achieved by the student in their previous course(s). This feature could serve as an indicator of academic ability or prior performance.

7. **ExtracurricularActivities** (Categorical/String): A binary or categorical indicator of whether the student participates in extracurricular activities. This could represent involvement in clubs, sports, or other non-academic pursuits, which may affect their academic performance.

8. **ParentalSupport** (Categorical/String): A categorical feature indicating whether the student receives parental support (e.g., "Yes" or "No"). Parental support may have a positive effect on a student's academic performance.

9. **FinalGrade** (Numeric): The target variable representing the student's final grade in their course or academic program. This is the variable that will be predicted in the machine learning model.


In [None]:
# Read the dataset from the CSV file
df = pd.read_csv("student_performance.csv")

# Display the first 5 rows of the dataframe to get an overview of the data
df.head()

In [None]:
df

In [None]:
# To find the duplicate values in the dataset
print(df.duplicated().sum())

In [None]:
# Display the shape of the dataframe to understand its dimensions
print("\nShape of the dataset (number of rows and columns):")
print(df.shape)

In [None]:
# Check for missing values
print(df.isnull().sum())

## **Observations**:
### Shape of the Dataset:
- **Number of Rows (Data points)**: 10
- **Number of Columns (Features)**: 9

### Missing Values:
The dataset does not have any missing values, as indicated by the count of 0 missing values across all features.

### Target Variable:
- The target variable is **FinalGrade**, which is numeric and will be used to predict the student's performance based on the other features.

In [None]:
df.info()

In [None]:
columns = df.columns

In [None]:
columns

In [None]:
for col in df.columns:
    unique_values = df[col].unique()
    print(f"{col}: {unique_values} (Count: {len(unique_values)})")

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
colors = ['mediumturquoise', 'lightgreen']
fig = px.pie(df, values='FinalGrade', names='Gender')
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()


In [None]:
fig = px.bar(df, x="FinalGrade", y="AttendanceRate", color="Gender", orientation='h',
             title="Attendance Rate vs Final Grade by Gender",
             labels={"AttendanceRate": "Attendance Rate", "FinalGrade": "Final Grade"})
fig.update_traces(texttemplate='%{x}', textposition='auto')
fig.update_layout(yaxis={'categoryorder':'total ascending'}) # Sorts the bars in ascending order

fig.show()

In [None]:
fig = px.histogram(df, x="AttendanceRate", y="FinalGrade", color="Gender", 
                   marginal="box", # Adds a box plot on the side for better distribution visibility
                   title="Distribution of Final Grades by Attendance Rate and Gender",
                   labels={"AttendanceRate": "Attendance Rate", "FinalGrade": "Final Grade"}) # Renaming axis labels for clarity
fig.update_layout(bargap=0.2) # Adjusts the bar gap for better readability
fig.show()

In [None]:
fig = px.bar(df, x="StudyHoursPerWeek", y="FinalGrade", color="ExtracurricularActivities",
             title="Study Hours per Week vs Final Grade by Extracurricular Activities",
             labels={"StudyHoursPerWeek": "Study Hours per Week", "FinalGrade": "Final Grade"})
fig.show()

In [None]:
fig = px.scatter(df, x="PreviousGrade", y="FinalGrade", color="Gender",
                 title="Previous Grade vs Final Grade by Gender",
                 labels={"PreviousGrade": "Previous Grade", "FinalGrade": "Final Grade"})
fig.show()

In [None]:
df.columns

In [None]:
plt.title('FinalGrade Histogram')
plt.xlabel('FinalGrade')
plt.ylabel('Frequency')

# Plotting the histogram
plt.hist(df['FinalGrade'], rwidth=0.8, bins=10)  # You can adjust the number of bins if needed
plt.show()

In [None]:
fig = px.scatter(df, x="StudyHoursPerWeek", y="FinalGrade", color="Gender",
                 size="PreviousGrade", # Represents the size of the points
                 title="Study Hours per Week vs Final Grade, Colored by Gender and Sized by Previous Grade",
                 labels={"StudyHoursPerWeek": "Study Hours per Week", "FinalGrade": "Final Grade", "PreviousGrade": "Previous Grade"})
fig.show()


In [None]:
# Identify the numeric columns
numeric_columns = ["StudentID", "AttendanceRate", "StudyHoursPerWeek", "PreviousGrade", "FinalGrade"]

# Loop through the numeric columns and create a box plot for each
for column in numeric_columns:
    fig = px.box(df, y=column, title=f"Distribution of {column}")
    fig.show()


In [None]:
# Dropping 'StudentID' and 'Name' columns
df = df.drop(columns=['StudentID', 'Name'])

In [None]:
# Display the resulting dataframe
df

In [None]:
from sklearn.preprocessing import LabelEncoder


# Label Encoding for categorical variables
le_gender = LabelEncoder()
le_support = LabelEncoder()

df['Gender'] = le_gender.fit_transform(df['Gender'])
df['ParentalSupport'] = le_support.fit_transform(df['ParentalSupport'])

# Display the resulting dataframe
df


In [None]:
# Correlation matrix
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f')
plt.title("Correlation Matrix")
plt.show()

In [None]:
# Pair plots
sns.pairplot(df)
plt.show()

In [None]:
# Histograms and density plots
df.hist(bins=20, figsize=(10, 10))
plt.show()

In [None]:
df.columns

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Scaling all numeric features
numeric_features = ['Gender', 'AttendanceRate', 'StudyHoursPerWeek', 'PreviousGrade', 'ExtracurricularActivities', 'ParentalSupport', 'FinalGrade']
scaler = MinMaxScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])
df

In [None]:
df.shape

In [None]:
from sklearn.model_selection import train_test_split

# Splitting data into training and testing sets
X = df.drop(columns=['FinalGrade'])
y = df['FinalGrade']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print("Shape of X_train", X_train.shape)
print("Shape of X_test", X_test.shape) 
print("Shape of y_train", y_train.shape) 
print("Shape of y_test", y_test.shape)

In [None]:
X_train.columns

## Training a Model

### Simple Linear Regression model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Training a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:

# Predicting on the test set
y_pred_lr = model.predict(X_test)

# Calculating MSE
mse = mean_squared_error(y_test, y_pred_lr)
print(f"Mean Squared Error: {mse}")

# Calculating  R-squared
r2 = r2_score(y_test, y_pred_lr)

print(f"R-squared: {r2}")

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Training a Decision Tree Regressor
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)

# Predicting on the test set
y_pred_tree = tree_model.predict(X_test)

# Evaluating the model
mse_tree = mean_squared_error(y_test, y_pred_tree)
r2_tree = r2_score(y_test, y_pred_tree)

print(f"Decision Tree Regressor - Mean Squared Error: {mse_tree}")
print(f"Decision Tree Regressor - R-squared: {r2_tree}")


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Training a Random Forest Regressor
forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
forest_model.fit(X_train, y_train)

# Predicting on the test set
y_pred_forest = forest_model.predict(X_test)

# Evaluating the model
mse_forest = mean_squared_error(y_test, y_pred_forest)
r2_forest = r2_score(y_test, y_pred_forest)

print(f"Random Forest Regressor - Mean Squared Error: {mse_forest}")
print(f"Random Forest Regressor - R-squared: {r2_forest}")


In [None]:
y_pred_forest

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

# Training a Gradient Boosting Regressor
gboost_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gboost_model.fit(X_train, y_train)

# Predicting on the test set
y_pred_gboost = gboost_model.predict(X_test)

# Evaluating the model
mse_gboost = mean_squared_error(y_test, y_pred_gboost)
r2_gboost = r2_score(y_test, y_pred_gboost)

print(f"Gradient Boosting Regressor - Mean Squared Error: {mse_gboost}")
print(f"Gradient Boosting Regressor - R-squared: {r2_gboost}")


In [None]:
y_pred_gboost

In [None]:
# Gather MSE and R-squared values
mse_values = {
    'Linear Regression': mse,
    'Decision Tree Regressor': mse_tree,
    'Random Forest Regressor': mse_forest,
    'Gradient Boosting Regressor': mse_gboost
}

r2_values = {
    'Linear Regression': r2,
    'Decision Tree Regressor': r2_tree,
    'Random Forest Regressor': r2_forest,
    'Gradient Boosting Regressor': r2_gboost
}


In [None]:
print("mse_values", mse_values)
print("r2_values", r2_values)

In [None]:
# Plot MSE values
plt.figure(figsize=(10, 5))
plt.bar(mse_values.keys(), mse_values.values(), color=['blue', 'green', 'red', 'purple', 'orange'])
plt.xlabel('Models')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('Comparison of Models by MSE')
plt.xticks(rotation=45)
plt.show()

# Plot R-squared values
plt.figure(figsize=(10, 5))
plt.bar(r2_values.keys(), r2_values.values(), color=['blue', 'green', 'red', 'purple', 'orange'])
plt.xlabel('Models')
plt.ylabel('R-squared')
plt.title('Comparison of Models by R-squared')
plt.xticks(rotation=45)
plt.show()


In [None]:
from sklearn.model_selection import cross_val_score

# Number of folds for cross-validation
k = 5

# Models to evaluate
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(random_state=42),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting Regressor': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}

# Perform cross-validation and collect results
cv_results_mse = {}
cv_results_r2 = {}

for name, model in models.items():
    mse_scores = cross_val_score(model, X, y, cv=k, scoring='neg_mean_squared_error')
    r2_scores = cross_val_score(model, X, y, cv=k, scoring='r2')
    cv_results_mse[name] = -mse_scores.mean()  # Negate because cross_val_score uses negative MSE
    cv_results_r2[name] = r2_scores.mean()

# Display cross-validation results
print("Cross-Validation Results (Mean Squared Error):")
for name, mse in cv_results_mse.items():
    print(f"{name}: {mse}")

print("\nCross-Validation Results (R-squared):")
for name, r2 in cv_results_r2.items():
    print(f"{name}: {r2}")


In [None]:
# Plot Cross-Validation MSE values
plt.figure(figsize=(10, 5))
plt.bar(cv_results_mse.keys(), cv_results_mse.values(), color=['blue', 'green', 'red', 'purple'])
plt.xlabel('Models')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('Cross-Validation Comparison of Models by MSE')
plt.xticks(rotation=45)
plt.show()

# Plot Cross-Validation R-squared values
plt.figure(figsize=(10, 5))
plt.bar(cv_results_r2.keys(), cv_results_r2.values(), color=['blue', 'green', 'red', 'purple'])
plt.xlabel('Models')
plt.ylabel('R-squared')
plt.title('Cross-Validation Comparison of Models by R-squared')
plt.xticks(rotation=45)
plt.show()


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Grid Search for Random Forest
grid_search_rf = GridSearchCV(RandomForestRegressor(random_state=42), param_grid_rf, cv=5, scoring='neg_mean_squared_error')
grid_search_rf.fit(X_train, y_train)

# Best parameters and score
print(f"Best parameters for Random Forest: {grid_search_rf.best_params_}")
print(f"Best cross-validation score (MSE): {-grid_search_rf.best_score_}")

# Repeat similar steps for Gradient Boosting
from sklearn.ensemble import GradientBoostingRegressor

# Define the parameter grid for Gradient Boosting
param_grid_gb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Grid Search for Gradient Boosting
grid_search_gb = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid_gb, cv=5, scoring='neg_mean_squared_error')
grid_search_gb.fit(X_train, y_train)

# Best parameters and score
print(f"Best parameters for Gradient Boosting: {grid_search_gb.best_params_}")
print(f"Best cross-validation score (MSE): {-grid_search_gb.best_score_}")


In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Initialize models
model = [LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
models = ['Linear Regression', 'Decision Tree', 'Random Forest', 'Gradient Boosting Regressor']

# Loop through models and compute metrics
for i, x in enumerate(model):
    train = x.fit(X_train, y_train)
    train_test = x.predict(X_test)
    print("Model for", models[i])
    print("MAE: ", np.round(mean_absolute_error(y_test, train_test), 5), "%")
    print("MSE: ", np.round(mean_squared_error(y_test, train_test), 5), "%")
    print("RMSE: ", np.round(np.sqrt(mean_squared_error(y_test, train_test)), 5), "%")
    print("R²: ", np.round(r2_score(y_test, train_test), 5), "%")


In [None]:
# Get the feature names seen during fitting
feature_names = df.columns
print("Feature names in the original training data:")
print(feature_names)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pickle

# Assuming X_train and y_train are already defined
# Step 1: Scale the features
scaler = StandardScaler()

# Fit and transform the training data
X_train_scaled = scaler.fit_transform(X_train)

# Step 2: Train the model using the scaled data
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Step 3: Save the model and scaler to pickle files
with open('linear_regression_model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)


In [None]:
# Load the trained model and scaler from the pickle files
with open('linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Step 4: Scale new input data (X_test) before predicting
X_test_scaled = loaded_scaler.transform(X_test)

# Step 5: Make predictions
predictions = loaded_model.predict(X_test_scaled)

# Check the model type
print(f"Model type: {type(loaded_model)}")


In [None]:
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler

# Load the trained model and scaler from pickle files
with open('linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Sample input data with the correct feature names
data = pd.DataFrame({
    'Gender': [1],  # Gender (e.g., 1 for male, 0 for female)
    'AttendanceRate': [91],  # Attendance rate in percentage
    'StudyHoursPerWeek': [20],  # Study hours per week
    'PreviousGrade': [85],  # Previous grade percentage
    'ExtracurricularActivities': [80],  # Extracurricular activity score
    'ParentalSupport': [3]  # Parental support (scale 1-5)
})

# Step 1: Scale the input data using the loaded scaler
scaled_data = loaded_scaler.transform(data)

# Step 2: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_data)

# Output the prediction
print(f"Prediction: {prediction[0]}")

# Output the intercept and coefficients
print("Intercept:", loaded_model.intercept_)
print("Coefficients:", loaded_model.coef_)


In [None]:
import pandas as pd
import pickle

# Load the trained model and scaler from pickle files
with open('linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Sample input data with different values and the correct feature names
data = pd.DataFrame({
    'Gender': [0],  # 0 for Female (assuming binary encoding)
    'AttendanceRate': [85],  # Percentage attendance
    'StudyHoursPerWeek': [15],  # Number of study hours per week
    'PreviousGrade': [75],  # Previous grade score
    'ExtracurricularActivities': [60],  # Score for extracurricular activities
    'ParentalSupport': [2]  # Level of parental support (assuming scale 1 to 3)
})

# Step 1: Scale the input data using the loaded scaler
scaled_data = loaded_scaler.transform(data)

# Step 2: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_data)

# Output the prediction
print(f"Prediction for input data: {prediction[0]}")

# Output the intercept and coefficients
print("Intercept:", loaded_model.intercept_)
print("Coefficients:", loaded_model.coef_)


In [None]:
# Now you can use this model to predict based on new inputs
# Enter new values for prediction
gender = float(input("Enter Gender (0 for Female, 1 for Male): "))
attendance_rate = float(input("Enter Attendance Rate (between 0 and 1): "))
study_hours = float(input("Enter Study Hours Per Week (between 0 and 1): "))
previous_grade = float(input("Enter Previous Grade (between 0 and 1): "))
extracurricular_activities = float(input("Enter Extracurricular Activities Score (between 0 and 1): "))
parental_support = float(input("Enter Parental Support Level (between 0 and 1): "))

# Prepare the user input as a DataFrame
user_input = pd.DataFrame({
    'Gender': [gender],
    'AttendanceRate': [attendance_rate],
    'StudyHoursPerWeek': [study_hours],
    'PreviousGrade': [previous_grade],
    'ExtracurricularActivities': [extracurricular_activities],
    'ParentalSupport': [parental_support]
})

# Step 1: Scale the input data using the loaded scaler
scaled_data = loaded_scaler.transform(user_input)

# Step 2: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_data)

# Output the prediction
print(f"Prediction for input data: {prediction[0]}")

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")

In [None]:
import pickle
import pandas as pd

# Load the trained model and scaler from pickle files
with open('linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Now you can use this model to predict based on new inputs
# Enter new values for prediction
gender = float(input("Enter Gender (0 for Female, 1 for Male): "))
attendance_rate = float(input("Enter Attendance Rate (between 0 and 100): "))
study_hours = float(input("Enter Study Hours Per Week (between 0 and 168): "))  # assuming 168 hours in a week
previous_grade = float(input("Enter Previous Grade (between 0 and 100): "))
extracurricular_activities = float(input("Enter Extracurricular Activities Score (between 0 and 100): "))
parental_support = float(input("Enter Parental Support Level (between 1 and 5): "))

# Prepare the user input as a DataFrame
user_input = pd.DataFrame({
    'Gender': [gender],
    'AttendanceRate': [attendance_rate],
    'StudyHoursPerWeek': [study_hours],
    'PreviousGrade': [previous_grade],
    'ExtracurricularActivities': [extracurricular_activities],
    'ParentalSupport': [parental_support]
})

# Step 1: Scale the input data using the loaded scaler
scaled_data = loaded_scaler.transform(user_input)

# Step 2: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_data)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


In [None]:
# Print the intercept and coefficients of the model
print("Intercept (β0):", loaded_model.intercept_)
print("Coefficients (β1, β2, ..., βn):", loaded_model.coef_)


In [None]:
import numpy as np
import pickle

# Load the trained model and scaler from pickle files
with open('linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Provide input as a 2D NumPy array (with one row for a single prediction)
# Example: [Gender, AttendanceRate, StudyHoursPerWeek, PreviousGrade, ExtracurricularActivities, ParentalSupport]
input_data = np.array([[0, 85, 15, 75, 60, 2]])

# Step 1: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 2: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


In [None]:
import json
import pandas as pd

# Input in JSON format (simulating an API request)
input_json = '''
{
    "Gender": 0,
    "AttendanceRate": 85,
    "StudyHoursPerWeek": 15,
    "PreviousGrade": 75,
    "ExtracurricularActivities": 60,
    "ParentalSupport": 2
}
'''

# Convert JSON string to Python dictionary
input_dict = json.loads(input_json)

# Step 1: Convert dictionary to a pandas DataFrame
input_data = pd.DataFrame([input_dict])

# Step 2: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 3: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


In [None]:
import json
import pandas as pd

# Input in JSON format (simulating an API request)
input_json = '''
{
    "Gender": 1,
    "AttendanceRate": 95,
    "StudyHoursPerWeek": 30,
    "PreviousGrade": 90,
    "ExtracurricularActivities": 85,
    "ParentalSupport": 5
}

'''

# Convert JSON string to Python dictionary
input_dict = json.loads(input_json)

# Step 1: Convert dictionary to a pandas DataFrame
input_data = pd.DataFrame([input_dict])

# Step 2: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 3: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


### Updated Code with MinMaxScaler

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
import pickle

# Assuming X_train and y_train are already defined (replace with your actual data)

# Step 1: Scale the features using MinMaxScaler
minmax_scaler = MinMaxScaler()

# Fit and transform the training data with MinMaxScaler
X_train_scaled = minmax_scaler.fit_transform(X_train)

# Step 2: Train the linear regression model using the scaled data
regressor = LinearRegression()
regressor.fit(X_train_scaled, y_train)

# Step 3: Save the model and scaler to pickle files
with open('minmax_linear_regression_model.pkl', 'wb') as model_file:
    pickle.dump(regressor, model_file)

with open('minmax_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(minmax_scaler, scaler_file)

print("Model and scaler have been saved successfully.")


In [None]:
import json
import pandas as pd
import pickle

# Load the saved model
with open('minmax_linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

# Load the saved scaler
with open('minmax_scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Input in JSON format (simulating an API request)
input_json = '''
{
    "Gender": 1,
    "AttendanceRate": 90,
    "StudyHoursPerWeek": 20,
    "PreviousGrade": 85,
    "ExtracurricularActivities": 80,
    "ParentalSupport": 3
}
'''

# Convert JSON string to Python dictionary
input_dict = json.loads(input_json)

# Step 1: Convert dictionary to a pandas DataFrame
input_data = pd.DataFrame([input_dict])

# Step 2: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 3: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


In [None]:
import json
import pandas as pd
import pickle

# Load the saved model
with open('minmax_linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

# Load the saved scaler
with open('minmax_scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Input in JSON format (simulating an API request)
input_json = '''
{
    "Gender": 0,
    "AttendanceRate": 85,
    "StudyHoursPerWeek": 15,
    "PreviousGrade": 75,
    "ExtracurricularActivities": 60,
    "ParentalSupport": 2
}
'''

# Convert JSON string to Python dictionary
input_dict = json.loads(input_json)

# Step 1: Convert dictionary to a pandas DataFrame
input_data = pd.DataFrame([input_dict])

# Step 2: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 3: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")


In [None]:
import json
import pandas as pd
import pickle

# Step 1: Load the saved model and scaler
with open('minmax_linear_regression_model.pkl', 'rb') as model_file:
    loaded_model = pickle.load(model_file)

with open('minmax_scaler.pkl', 'rb') as scaler_file:
    loaded_scaler = pickle.load(scaler_file)

# Step 2: Input JSON (simulating an API request)
input_json = '''
{
    "Gender": 1,
    "AttendanceRate": 95,
    "StudyHoursPerWeek": 30,
    "PreviousGrade": 90,
    "ExtracurricularActivities": 85,
    "ParentalSupport": 5
}


'''

# Convert JSON string to Python dictionary
input_dict = json.loads(input_json)

# Step 3: Convert dictionary to pandas DataFrame
input_data = pd.DataFrame([input_dict])

# Step 4: Scale the input data using the loaded scaler
scaled_input = loaded_scaler.transform(input_data)

# Step 5: Perform prediction using the loaded model
prediction = loaded_model.predict(scaled_input)

# Step 6: Output the prediction
print(f"Prediction for the student's Final Grade: {prediction[0]:.2f}")
