In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
from sklearn import metrics
from sklearn.metrics import roc_curve, auc, roc_auc_score
import warnings
warnings.filterwarnings("ignore")

Importing the data

In [2]:
data = pd.read_csv('Student Mental health.csv')
data.head()

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,Your current year of Study,What is your CGPA?,Marital status,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No


In [3]:
#Information on features
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 11 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Timestamp                                     101 non-null    object 
 1   Choose your gender                            101 non-null    object 
 2   Age                                           100 non-null    float64
 3   What is your course?                          101 non-null    object 
 4   Your current year of Study                    101 non-null    object 
 5   What is your CGPA?                            101 non-null    object 
 6   Marital status                                101 non-null    object 
 7   Do you have Depression?                       101 non-null    object 
 8   Do you have Anxiety?                          101 non-null    object 
 9   Do you have Panic attack?                     101 non-null    obj

In [4]:
#drop NA value
data= data[data['Age'].notna()]
data.shape

(100, 11)

In [5]:
data.head(1)

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,Your current year of Study,What is your CGPA?,Marital status,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No


In [6]:
data.columns = ['Timestamp', 'Gender', 'Age', 'Course', 'Year', 'CGPA', 'Marital_Status', 'Depression', 'Anxiety', 'Panic_Attack', 'Treatment']
data.head(1)

Unnamed: 0,Timestamp,Gender,Age,Course,Year,CGPA,Marital_Status,Depression,Anxiety,Panic_Attack,Treatment
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No


In [7]:
data

Unnamed: 0,Timestamp,Gender,Age,Course,Year,CGPA,Marital_Status,Depression,Anxiety,Panic_Attack,Treatment
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,BIT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Laws,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...
96,13/07/2020 19:56:49,Female,21.0,BCS,year 1,3.50 - 4.00,No,No,Yes,No,No
97,13/07/2020 21:21:42,Male,18.0,Engineering,Year 2,3.00 - 3.49,No,Yes,Yes,No,No
98,13/07/2020 21:22:56,Female,19.0,Nursing,Year 3,3.50 - 4.00,Yes,Yes,No,Yes,No
99,13/07/2020 21:23:57,Female,23.0,Pendidikan Islam,year 4,3.50 - 4.00,No,No,No,No,No


In [8]:
#Formatting the Year column
data['Year'].unique()
# array(['year 1', 'year 2', 'Year 1', 'year 3', 'year 4', 'Year 2', 'Year 3'], dtype=object)

array(['year 1', 'year 2', 'Year 1', 'year 3', 'year 4', 'Year 2',
       'Year 3'], dtype=object)

In [9]:
# def Clean(Text):
#     Text = Text[-1]
#     Text = int(Text)
#     return Text
# data["Year"] = data["Year"].apply(Clean)
# print("First three values of Year after cleaning text:")
# print(data["Year"][:3], "\n")

In [10]:
data['CGPA'].unique()

array(['3.00 - 3.49', '3.50 - 4.00', '3.50 - 4.00 ', '2.50 - 2.99',
       '2.00 - 2.49', '0 - 1.99'], dtype=object)

In [11]:
def remove_space(string):
    string = string.strip()
    return string
data["CGPA"] = data["CGPA"].apply(remove_space)
print("First three values of CGPA after cleaning text:")
print(data["CGPA"][:3], "\n")
print(data['CGPA'].unique())

First three values of CGPA after cleaning text:
0    3.00 - 3.49
1    3.00 - 3.49
2    3.00 - 3.49
Name: CGPA, dtype: object 

['3.00 - 3.49' '3.50 - 4.00' '2.50 - 2.99' '2.00 - 2.49' '0 - 1.99']


In [12]:
len(data['Course'].unique())

49

In [13]:
#Let's replace redundant course name with the standard course name
data['Course'].replace({'engin': 'Engineering' , 'Engine':'Engineering' , 'Islamic education':'Islamic Education' , 'Pendidikan islam':'Pendidikan Islam' , 'BIT':'IT', 'psychology':'Psychology', 'koe': 'Koe', 'Kirkhs': 'Irkhs', 'KIRKHS': 'Irkhs', 'Benl': 'BENL', 'Fiqh fatwa ': 'Fiqh', 'Laws': 'Law'} , inplace = True)
len(data['Course'].unique())

37

In [14]:
# Display the current column names
print(data.columns)


Index(['Timestamp', 'Gender', 'Age', 'Course', 'Year', 'CGPA',
       'Marital_Status', 'Depression', 'Anxiety', 'Panic_Attack', 'Treatment'],
      dtype='object')


 preprocessing your data


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Assuming your dataset is stored in a DataFrame called 'data'

# Drop the 'Timestamp' column as it's not relevant for modeling
#data = data.drop('Timestamp', axis=1)

# Handle missing values
data.dropna(inplace=True)  # Drop rows with missing values for simplicity

# Encode categorical variables using One-Hot Encoding
categorical_columns = ['Gender', 'Course', 'Marital_Status']

# Use get_dummies for one-hot encoding
data_encoded = pd.get_dummies(data, columns=categorical_columns)

# Convert 'CGPA' to a numerical format
# Assuming you want to take the average of the range
data_encoded['CGPA'] = data_encoded['CGPA'].apply(lambda x: (float(x.split()[0]) + float(x.split()[-1])) / 2 if '-' in x else float(x))

# Map yes/no responses to binary values for binary columns
binary_columns = ['Depression', 'Anxiety', 'Panic_Attack', 'Treatment']
data_encoded[binary_columns] = data_encoded[binary_columns].apply(lambda x: x.map({'Yes': 1, 'No': 0}))

# Split the data into features (X) and target variable (y)
X = data_encoded.drop('CGPA', axis=1)
y = data_encoded['CGPA']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the preprocessed data
print("X_train:")
print(X_train.head())

print("\nX_test:")
print(X_test.head())

print("\ny_train:")
print(y_train.head())

print("\ny_test:")
print(y_test.head())


X_train:
              Timestamp   Age    Year  Depression  Anxiety  Panic_Attack  \
56       8/7/2020 16:08  23.0  Year 3           0        0             0   
89  13/07/2020 11:54:58  24.0  Year 2           0        0             1   
26       8/7/2020 13:35  19.0  year 1           0        0             0   
42       8/7/2020 14:58  20.0  year 2           1        0             0   
70       8/7/2020 20:36  24.0  year 1           0        0             0   

    Treatment  Gender_Female  Gender_Male  Course_ALA  ...  \
56          0          False         True       False  ...   
89          0          False         True       False  ...   
26          0           True        False       False  ...   
42          0           True        False       False  ...   
70          0           True        False       False  ...   

    Course_Mathemathics  Course_Nursing   Course_Pendidikan Islam  \
56                False            False                    False   
89                False

In [16]:
# Assuming 'CGPA' is a string column containing ranges
# Convert the 'CGPA' column to numerical representation (average of the range)
data['CGPA'] = data['CGPA'].apply(lambda x: np.mean([float(val) for val in x.split('-')]))

# Display the updated 'CGPA' column
print(data['CGPA'])


0      3.245
1      3.245
2      3.245
3      3.245
4      3.245
       ...  
96     3.750
97     3.245
98     3.750
99     3.750
100    3.245
Name: CGPA, Length: 100, dtype: float64


In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score



# Separate features and target variable
X = data.drop('CGPA', axis=1)
y = data['CGPA']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define categorical and numerical features
categorical_features = ['Gender', 'Course', 'Year', 'Marital_Status', 'Depression', 'Anxiety', 'Panic_Attack', 'Treatment']
numerical_features = ['Age']

# Create transformers
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numerical_transformer = SimpleImputer(strategy='mean')

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numerical_transformer, numerical_features)
    ])

# Create the model
model = RandomForestRegressor(random_state=42)

# Create and evaluate the pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                             ('model', model)])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")


Mean Squared Error: 0.37
R-squared: 0.03


• Train 5 ML models


In [18]:
# Assuming 'Year' is the column with string values like 'Year 3'

# Extract the numeric part from the 'Year' column
X_train['Year'] = X_train['Year'].str.extract('(\d+)').astype(float)
X_test['Year'] = X_test['Year'].str.extract('(\d+)').astype(float)


In [28]:
pip install xgboost


Collecting xgboost
  Downloading xgboost-2.0.2-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.2-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 145.2 kB/s eta 0:11:27
   ---------------------------------------- 0.1/99.8 MB 233.8 kB/s eta 0:07:07
   ---------------------------------------- 0.2/99.8 MB 765.3 kB/s eta 0:02:11
   ---------------------------------------- 0.4/99.8 MB 1.3 MB/s eta 0:01:15
   ---------------------------------------- 0.9/99.8 MB 2.5 MB/s eta 0:00:40
    --------------------------------------- 2.2/99.8 MB 4.9 MB/s eta 0:00:20
   - ------------------------------

In [31]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, explained_variance_score
import numpy as np
import pandas as pd
from xgboost import XGBRegressor  # Import XGBoost

# Assuming your dataset is loaded as 'data'
# If you have a separate 'Timestamp' column, remove it
X = data.drop(['Timestamp', 'CGPA'], axis=1)  # Drop 'Timestamp' and the target variable 'CGPA'
y = data['CGPA']  # Target variable

# Perform any necessary preprocessing on X (handle categorical variables, etc.)
# For simplicity, we'll use pandas get_dummies for categorical encoding
X = pd.get_dummies(X)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train 5 ML models

# Model 1: Linear Regression
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)

# Model 2: Decision Tree Regression
decision_tree_model = DecisionTreeRegressor()
decision_tree_model.fit(X_train, y_train)

# Model 3: Random Forest Regression
random_forest_model = RandomForestRegressor()
random_forest_model.fit(X_train, y_train)

# Model 4: Support Vector Regression
svr_model = SVR()
svr_model.fit(X_train, y_train)

# Model 5: XGBoost Regression
xgboost_model = XGBRegressor()  # Initialize XGBoost model
xgboost_model.fit(X_train, y_train)

# Evaluate the models
models = [linear_reg_model, decision_tree_model, random_forest_model, svr_model, xgboost_model]
model_names = ['Linear Regression', 'Decision Tree', 'Random Forest', 'SVR', 'XGBoost']

for model, name in zip(models, model_names):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    explained_var = explained_variance_score(y_test, y_pred)

    print(f"\n{name} Model:")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"R-squared: {r2:.2f}")
    print(f"Explained Variance: {explained_var:.2f}")

# Note: This is a basic example, and you might need to fine-tune parameters and explore more advanced techniques based on your dataset.



Linear Regression Model:
Mean Squared Error: 40742308622703473459200.00
R-squared: -107241295358019200090112.00
Explained Variance: -95687079505471882657792.00

Decision Tree Model:
Mean Squared Error: 0.44
R-squared: -0.17
Explained Variance: -0.12

Random Forest Model:
Mean Squared Error: 0.37
R-squared: 0.02
Explained Variance: 0.03

SVR Model:
Mean Squared Error: 0.38
R-squared: 0.00
Explained Variance: 0.01

XGBoost Model:
Mean Squared Error: 0.41
R-squared: -0.09
Explained Variance: -0.08


• Try to track models performance, versions and parameters


In [20]:
pip install mlflow


Note: you may need to restart the kernel to use updated packages.


In [21]:
pip install pyngrok

Note: you may need to restart the kernel to use updated packages.


In [32]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
import mlflow
import mlflow.sklearn
import subprocess
import requests
import json
import time
from pyngrok import ngrok

# Set up your ngrok authtoken
ngrok_authtoken = "2YwjzxbGl7uRWqlQl7jDhS2RD8D_3CU9mF12mrRkh2pzHMUhk"  # Replace with your ngrok authtoken

# Set up MLflow experiment ID
experiment_id = "your_experiment_id"  # Replace with your MLflow experiment ID

# Start MLflow Tracking Server
mlflow_server_process = subprocess.Popen(["mlflow", "ui"])

# Wait for a moment to ensure the MLflow server is running
time.sleep(5)

# Set up MLflow for Ngrok tracking
mlflow.set_tracking_uri("http://127.0.0.1:5000")  # Assuming MLflow is running locally
mlflow.set_experiment(experiment_id)

# Train 5 ML models

# Model 1: Linear Regression
with mlflow.start_run():
    linear_reg_model = LinearRegression()
    linear_reg_model.fit(X_train, y_train)
    mlflow.sklearn.log_model(linear_reg_model, "model")
    mlflow.log_params({'model_type': 'Linear Regression'})

# Model 2: Decision Tree Regression
with mlflow.start_run():
    decision_tree_model = DecisionTreeRegressor()
    decision_tree_model.fit(X_train, y_train)
    mlflow.sklearn.log_model(decision_tree_model, "model")
    mlflow.log_params({'model_type': 'Decision Tree'})

# Model 3: Random Forest Regression
with mlflow.start_run():
    random_forest_model = RandomForestRegressor()
    random_forest_model.fit(X_train, y_train)
    mlflow.sklearn.log_model(random_forest_model, "model")
    mlflow.log_params({'model_type': 'Random Forest'})

# Model 4: Support Vector Regression
with mlflow.start_run():
    svr_model = SVR()
    svr_model.fit(X_train, y_train)
    mlflow.sklearn.log_model(svr_model, "model")
    mlflow.log_params({'model_type': 'SVR'})

# Model 5: Gradient Boosting Regression
with mlflow.start_run():
    gradient_boosting_model = GradientBoostingRegressor()
    gradient_boosting_model.fit(X_train, y_train)
    mlflow.sklearn.log_model(gradient_boosting_model, "model")
    mlflow.log_params({'model_type': 'Gradient Boosting'})

# Open an HTTP tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

# Sleep for 10 seconds (you can adjust the duration as needed)
time.sleep(10)

# Stop MLflow and Ngrok processes
mlflow_server_process.terminate()
ngrok.kill()


t=2023-12-02T19:19:50+0100 lvl=warn msg="ngrok config file found at legacy location, move to XDG location" xdg_path="C:\\Users\\ASUS ZENBOOK\\AppData\\Local/ngrok/ngrok.yml" legacy_path="C:\\Users\\ASUS ZENBOOK\\.ngrok2\\ngrok.yml"
t=2023-12-02T19:19:51+0100 lvl=eror msg="failed to reconnect session" obj=tunnels.session obj=csess id=07034fb76454 err="authentication failed: The authtoken you specified is properly formed, but it is invalid.\nYour authtoken: 2S760yr3DdcFN5KMjQFSlEZDF6T_65sMZt9dUuhow53hSP3cP\nThis usually happens when:\n    - You reset your authtoken\n    - Your authtoken was for a team account that you were removed from\n    - You are using ngrok link and this credential was explicitly revoked\nGo to your ngrok dashboard and double check that your authtoken is correct:\nhttps://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_107\r\n"
t=2023-12-02T19:19:51+0100 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: The authtoken you 

PyngrokNgrokError: The ngrok process errored on start: authentication failed: The authtoken you specified is properly formed, but it is invalid.\nYour authtoken: 2S760yr3DdcFN5KMjQFSlEZDF6T_65sMZt9dUuhow53hSP3cP\nThis usually happens when:\n    - You reset your authtoken\n    - Your authtoken was for a team account that you were removed from\n    - You are using ngrok link and this credential was explicitly revoked\nGo to your ngrok dashboard and double check that your authtoken is correct:\nhttps://dashboard.ngrok.com/get-started/your-authtoken\r\n\r\nERR_NGROK_107\r\n.

In [23]:
pip install skl2onnx

Note: you may need to restart the kernel to use updated packages.


Save the best model in ONNX format and its dedicated preprocessing transformations
(i.e., using transformers API) in pickle format

In [33]:
import onnx
import pickle
from sklearn.linear_model import LinearRegression  # Replace with your best model
from sklearn.preprocessing import StandardScaler  # Replace with your preprocessing transformer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Assume you have a trained model named 'best_model'
best_model = LinearRegression()  # Replace with your actual best model
best_model.fit(X_train, y_train)  # Train the model on your training data

# Convert the best model to ONNX format
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(best_model, initial_types=initial_type)

# Save the ONNX model to a file
onnx.save_model(onnx_model, "best_model.onnx")

# Assume you have a preprocessing transformer named 'preprocessing_transformer'
preprocessing_transformer = StandardScaler()  # Replace with your actual preprocessing transformer
preprocessing_transformer.fit(X_train)  # Fit the transformer on your training data

# Save the preprocessing transformer using pickle
with open("preprocessing_transformer.pkl", "wb") as transformer_file:
    pickle.dump(preprocessing_transformer, transformer_file)


In [34]:
from sklearn.linear_model import LinearRegression
import joblib

# Assuming your dataset is loaded as 'data'
# If you have a separate 'Timestamp' column, remove it
X = data.drop(['Timestamp', 'CGPA'], axis=1)  # Drop 'Timestamp' and the target variable 'CGPA'
y = data['CGPA']  # Target variable

# Perform any necessary preprocessing on X (handle categorical variables, etc.)
# For simplicity, we'll use pandas get_dummies for categorical encoding
X = pd.get_dummies(X)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Linear Regression model
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train)

# Save the Linear Regression model as .pkl
joblib.dump(linear_reg_model, 'linear_regression_model.pkl')


['linear_regression_model.pkl']

Using FastAP

In [25]:
pip install fastapi


Note: you may need to restart the kernel to use updated packages.


In [None]:
data

Unnamed: 0,Timestamp,Gender,Age,Course,Year,CGPA,Marital_Status,Depression,Anxiety,Panic_Attack,Treatment
0,8/7/2020 12:02,Female,18.0,Engineering,year 1,3.245,No,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,Islamic Education,year 2,3.245,No,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,IT,Year 1,3.245,No,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,Law,year 3,3.245,Yes,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,Mathemathics,year 4,3.245,No,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...,...
96,13/07/2020 19:56:49,Female,21.0,BCS,year 1,3.750,No,No,Yes,No,No
97,13/07/2020 21:21:42,Male,18.0,Engineering,Year 2,3.245,No,Yes,Yes,No,No
98,13/07/2020 21:22:56,Female,19.0,Nursing,Year 3,3.750,Yes,Yes,No,Yes,No
99,13/07/2020 21:23:57,Female,23.0,Pendidikan Islam,year 4,3.750,No,No,No,No,No


In [None]:
# from fastapi import FastAPI, Request, Form
# from fastapi.templating import Jinja2Templates
# import uvicorn
# import pickle
# import onnxruntime
# from typing import Annotated  # Remove 'Doc' import

# import numpy as np
# import pandas as pd

# # Load the ONNX model
# onnx_model = onnxruntime.InferenceSession("best_model.onnx")

# # Load the preprocessing transformer
# with open("preprocessing_transformer.pkl", "rb") as transformer_file:
#     preprocessing_transformer = pickle.load(transformer_file)

# # Load your dataset
# dataset = pd.read_csv("your_dataset.csv")  # Replace with your dataset path

# app = FastAPI()
# templates = Jinja2Templates(directory="templates")

# class InputData:
#     def __init__(self, age: float, depression: float, anxiety: float, panic_attack: float):
#         self.age = age
#         self.depression = depression
#         self.anxiety = anxiety
#         self.panic_attack = panic_attack

# @app.get("/")
# def index(request: Request):
#     return templates.TemplateResponse("index.html", {"request": request})

# @app.post('/predict')
# async def predict(request: Request, age: float = Form(...), depression: float = Form(...), anxiety: float = Form(...), panic_attack: float = Form(...)):
#     try:
#         # Create an InputData instance
#         input_data = InputData(age=age, depression=depression, anxiety=anxiety, panic_attack=panic_attack)

#         # Create a DataFrame with the input features
#         input_df = pd.DataFrame([[input_data.age, input_data.depression, input_data.anxiety, input_data.panic_attack]],
#                                 columns=["Age", "Depression", "Anxiety", "Panic_Attack"])

#         # Apply preprocessing to the input features
#         preprocessed_data = preprocessing_transformer.transform(input_df)

#         # Make a prediction using the ONNX model
#         prediction = onnx_model.run(None, {"float_input": preprocessed_data.astype(np.float32)})

#         # Extract the prediction from the ONNX output
#         result = prediction[0][0]

#         prediction_text = f"The predicted CGPA is: {result:.2f}"

#         return templates.TemplateResponse("index.html", {"request": request, "prediction": prediction_text})
#     except Exception as e:
#         return templates.TemplateResponse("index.html", {"request": request, "prediction": f"Error: {str(e)}"})

# if __name__ == '__main__':
#     uvicorn.run(app)


In [26]:
pip install uvicorn


Note: you may need to restart the kernel to use updated packages.


In [None]:
data

Unnamed: 0,Gender,Age,Course,Year,CGPA,Marital_Status,Depression,Anxiety,Panic_Attack,Treatment
0,Female,18.0,Engineering,year 1,3.00 - 3.49,No,Yes,No,Yes,No
1,Male,21.0,Islamic Education,year 2,3.00 - 3.49,No,No,Yes,No,No
2,Male,19.0,IT,Year 1,3.00 - 3.49,No,Yes,Yes,Yes,No
3,Female,22.0,Law,year 3,3.00 - 3.49,Yes,Yes,No,No,No
4,Male,23.0,Mathemathics,year 4,3.00 - 3.49,No,No,No,No,No
...,...,...,...,...,...,...,...,...,...,...
96,Female,21.0,BCS,year 1,3.50 - 4.00,No,No,Yes,No,No
97,Male,18.0,Engineering,Year 2,3.00 - 3.49,No,Yes,Yes,No,No
98,Female,19.0,Nursing,Year 3,3.50 - 4.00,Yes,Yes,No,Yes,No
99,Female,23.0,Pendidikan Islam,year 4,3.50 - 4.00,No,No,No,No,No
