In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Load the CSV data from the same folder as this script
# Ensure that 'Productivity_Test.csv' and 'student_productivity.csv' are in the same directory as this script
data1 = pd.read_csv("Productivity_Test.csv")
data2 = pd.read_csv("student_productivity.csv")

# Process data1
data1['hour_slot'] = data1['hour_slot'].apply(lambda x: int(x.split('-')[0]))
data1['productivity_rating'] = data1['productivity_rating'].astype(float)

# Process data2
data2['day_of_week'] = data2['day_of_week'].map({
    'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3,
    'Friday': 4, 'Saturday': 5, 'Sunday': 6
})
data2['time_of_day'] = data2['time_of_day'].str.replace(':00', '').astype(int)

# Create 4-hour slots for data2
def get_time_slot(hour):
    if 0 <= hour < 4:
        return 0
    elif 4 <= hour < 8:
        return 1
    elif 8 <= hour < 12:
        return 2
    elif 12 <= hour < 16:
        return 3
    elif 16 <= hour < 20:
        return 4
    else:
        return 5

data2['time_slot'] = data2['time_of_day'].apply(get_time_slot)

# Combine the datasets
data_combined = pd.concat([
    data1[['hour_slot', 'productivity_rating']],
    data2[['day_of_week', 'time_slot', 'productivity']]
], axis=0, ignore_index=True)

# Select features and target
data_combined['productivity'] = data_combined['productivity_rating'].fillna(data_combined['productivity'])
X = data_combined[['hour_slot', 'day_of_week', 'time_slot']]
y = data_combined['productivity']

# Normalize the features
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

# Reshape input for LSTM
sequence_length = 6  # Example sequence length
X_lstm, y_lstm = [], []

for i in range(len(X_scaled) - sequence_length):
    X_lstm.append(X_scaled[i:i + sequence_length])
    y_lstm.append(y.iloc[i + sequence_length])  # Adjusted to match y

X_lstm = np.array(X_lstm)
y_lstm = np.array(y_lstm)

# Split the data into training and testing sets
split = int(0.8 * len(X_lstm))
X_train, X_test = X_lstm[:split], X_lstm[split:]
y_train, y_test = y_lstm[:split], y_lstm[split:]

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(sequence_length, X.shape[1])))
model.add(LSTM(units=32))
model.add(Dense(1))

# Compile the model with Adam optimizer
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
y_pred = model.predict(X_test)
mse = np.mean(np.square(y_pred - y_test))
print(f'Mean Squared Error: {mse}')

# Predict productivity for the entire dataset
predicted_productivity = model.predict(X_lstm)

# Prepare the results for plotting
data_combined['predicted_productivity'] = np.nan
data_combined['predicted_productivity'].iloc[sequence_length:] = predicted_productivity.flatten()

# Plot the productivity graph
plt.figure(figsize=(14, 7))
for day in range(7):
    day_data = data_combined[data_combined['day_of_week'] == day]
    plt.plot(day_data['time_slot'] * 4, day_data['predicted_productivity'], label=f'Day {day}')

plt.xticks(ticks=[0, 1, 2, 3, 4, 5], labels=['0-4am', '4-8am', '8-12pm', '12-4pm', '4-8pm', '8pm-12am'])
plt.xlabel('Time of Day')
plt.ylabel('Predicted Productivity')
plt.title('Predicted Productivity Throughout the Day (LSTM Model)')
plt.legend(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
plt.grid(True)
plt.show()
