In [1]:
import pickle
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, accuracy_score

In [5]:
def assign_weight(days):
        if days <= 7:
            return 1.0
        elif days <= 14:
            return 0.75
        elif days <= 21:
            return 0.5
        else:
            return 0.25

def updated_df(filepath):

    ext = filepath.split(".")
    
    if ext[1] == 'xlsx': 
        df = pd.read_excel(filepath, index_col=0)
        df['Date'] = pd.to_datetime(df['Date'])
    elif ext[1] == 'csv':
        df = pd.read_csv(filepath, index_col=0)
        df['Date'] = pd.to_datetime(df['Date'], format="%d-%m-%Y")
            
    last_date = df['Date'].max() 
    df['Days Ago'] = (last_date - df['Date']).dt.days
    
    df['Weight'] = df['Days Ago'].apply(assign_weight)
    
    # Weighted focus score and productivity
    df['Weighted Focus Score'] = df['Focus Score (0-10)'] * df['Weight']
    df['Weighted Productivity'] = df['Productivity %'] * df['Weight']
    
    # Group by Day and Hour
    try:
        df["Hour"] = pd.to_datetime(df["Start Time"] , format="%H:%M:%S").dt.hour
    except:
        df["Hour"] = pd.to_datetime(df["Start Time"] , format="%H:%M").dt.hour
    
    grouped = df.groupby(['Day', 'Hour']).agg({
        'Weighted Focus Score': 'sum',
        'Weighted Productivity': 'sum',
        'Weight': 'sum'
    }).reset_index()
    
    # Final weighted averages
    grouped['Avg Focus Score'] = grouped['Weighted Focus Score'] / grouped['Weight']
    grouped['Avg Productivity %'] = grouped['Weighted Productivity'] / grouped['Weight']

    return df

In [6]:
df1 = updated_df("dataset/student1.csv")
df2 = updated_df("dataset/student2.csv")
df3 = updated_df("dataset/student3.csv")
df4 = updated_df("dataset/student4.csv")
df5 = updated_df("dataset/student5.csv")
df6 = updated_df("dataset/student6.csv")
df7 = updated_df("dataset/student7.csv")
df8 = updated_df("dataset/student8.csv")
df9 = updated_df("dataset/student9.csv")
df10 = updated_df("dataset/focus.xlsx")
df11 = updated_df("dataset/focus_data.csv")

In [17]:
dfs = [df1,df2,df3,df4,df5,df6,df7,df8,df9,df10,df11]
combined_df = pd.concat(dfs).reset_index(drop=True)

In [18]:
combined_df = combined_df.fillna(0)

# BEST DAY

In [19]:
df = combined_df.copy()

In [20]:
days = {'Monday':0, 'Tuesday':1, 'Wednesday':2, 'Thursday':3, 'Friday':4, 'Saturday':5, 'Sunday':6}
df['Day_encoded'] = df['Day'].map(days)

df['combined_score'] = df["Focus Score (0-10)"] * 0.5 + 0.5 * df['Productivity %']
df['Productivity_Label'] = pd.qcut(df['combined_score'], q=3, labels=['Low', 'Mid', 'High'])
df

Unnamed: 0,Date,Day,Start Time,End Time,Task Type,App Switch Count,Distraction Duration (mins),Total Focus Duration (mins),Focus Score (0-10),Productivity %,Days Ago,Weight,Weighted Focus Score,Weighted Productivity,Hour,Day_encoded,combined_score,Productivity_Label
0,2025-02-01,Saturday,09:17,10:08,studying,3,11.000,27.000,7,71.050,58,0.25,1.75,17.7625,9,5,39.0250,Mid
1,2025-02-01,Saturday,12:51,13:32,writing,1,8.000,26.000,3,76.470,58,0.25,0.75,19.1175,12,5,39.7350,Mid
2,2025-02-02,Sunday,16:12,17:14,coding,1,9.000,19.000,7,67.860,57,0.25,1.75,16.9650,16,6,37.4300,Mid
3,2025-02-02,Sunday,15:20,16:37,reading,0,36.000,40.000,1,52.630,57,0.25,0.25,13.1575,15,6,26.8150,Low
4,2025-02-02,Sunday,12:20,13:04,others,1,24.000,11.000,4,31.430,57,0.25,1.00,7.8575,12,6,17.7150,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,2025-03-30,Sunday,12:28:22,13:13:22,Writing,21,5.223,25.000,3,82.716,1,1.00,3.00,82.7160,12,6,42.8580,Mid
1436,2025-03-31,Monday,09:38:59,10:28:59,Coding,11,6.115,43.885,5,87.768,0,1.00,5.00,87.7680,9,0,46.3840,Mid
1437,2025-03-31,Monday,14:08:50,15:58:50,Coding,18,2.369,50.000,6,95.475,0,1.00,6.00,95.4750,14,0,50.7375,High
1438,2025-03-31,Monday,07:23:55,07:53:55,Coding,10,8.031,21.969,2,73.228,0,1.00,2.00,73.2280,7,0,37.6140,Mid


In [21]:
features = ['Day_encoded', 'App Switch Count', 
            'Distraction Duration (mins)', 'Total Focus Duration (mins)', 
            'Weighted Focus Score', 'Weighted Productivity']
X_day = df[features]
y_day = df['Productivity_Label']


In [35]:
model_day = MLPClassifier(max_iter=1000)
model_day.fit(X_day, y_day)

In [39]:
with open("Best_Day.pkl", "wb") as f:
    pickle.dump(model_day, f)

In [14]:
def predict_best_days():
    df['Predicted_Productivity'] = models_day["MLP"].predict(X)
    
    # Now group by day and check % of high productivity
    day_stats = df.groupby('Day')['Predicted_Productivity'].value_counts(normalize=True).unstack().fillna(0)
    
    # Sort by percentage of 'High' sessions
    top_days = day_stats.sort_values(by='High', ascending=False)
    
    print("🔝 Best Days Ranked by High Productivity %:\n")
    print(top_days[['High']][:3].index.tolist())

# BEST TIME

In [27]:
dft = df.copy()
dft

Unnamed: 0,Date,Day,Start Time,End Time,Task Type,App Switch Count,Distraction Duration (mins),Total Focus Duration (mins),Focus Score (0-10),Productivity %,Days Ago,Weight,Weighted Focus Score,Weighted Productivity,Hour,Day_encoded,combined_score,Productivity_Label
0,2025-02-01,Saturday,09:17,10:08,studying,3,11.000,27.000,7,71.050,58,0.25,1.75,17.7625,9,5,39.0250,Mid
1,2025-02-01,Saturday,12:51,13:32,writing,1,8.000,26.000,3,76.470,58,0.25,0.75,19.1175,12,5,39.7350,Mid
2,2025-02-02,Sunday,16:12,17:14,coding,1,9.000,19.000,7,67.860,57,0.25,1.75,16.9650,16,6,37.4300,Mid
3,2025-02-02,Sunday,15:20,16:37,reading,0,36.000,40.000,1,52.630,57,0.25,0.25,13.1575,15,6,26.8150,Low
4,2025-02-02,Sunday,12:20,13:04,others,1,24.000,11.000,4,31.430,57,0.25,1.00,7.8575,12,6,17.7150,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,2025-03-30,Sunday,12:28:22,13:13:22,Writing,21,5.223,25.000,3,82.716,1,1.00,3.00,82.7160,12,6,42.8580,Mid
1436,2025-03-31,Monday,09:38:59,10:28:59,Coding,11,6.115,43.885,5,87.768,0,1.00,5.00,87.7680,9,0,46.3840,Mid
1437,2025-03-31,Monday,14:08:50,15:58:50,Coding,18,2.369,50.000,6,95.475,0,1.00,6.00,95.4750,14,0,50.7375,High
1438,2025-03-31,Monday,07:23:55,07:53:55,Coding,10,8.031,21.969,2,73.228,0,1.00,2.00,73.2280,7,0,37.6140,Mid


In [29]:
features = ['App Switch Count', 'Hour',
            'Distraction Duration (mins)', 'Total Focus Duration (mins)', 
            'Weighted Focus Score', 'Weighted Productivity',]
X_time = dft[features]
y_time = dft['Productivity_Label']

In [37]:
model_Time = MLPClassifier(max_iter=1000)
model_Time.fit(X_time, y_time)

In [34]:
def Predict_best_time():
     # Predict productivity level for all records
    df3['Predicted_Productivity'] = models_time["MLP"].predict(X2)
    
    # Now group by day and check % of high productivity
    time_stats = df2.groupby('Hour')['Predicted_Productivity'].value_counts(normalize=True).unstack().fillna(0)
    
    # Sort by percentage of 'High' sessions
    top_hours = time_stats.sort_values(by='High', ascending=False)
    
    print("🔝 Best Days Ranked by High Productivity %:\n")
    print(top_hours[['High']][:5].index.tolist())

In [41]:
with open("Best_Time.pkl", "wb") as f:
    pickle.dump(model_Time, f)