Import required libraries  ------------------------------------------------------------->>>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.metrics import Accuracy

Load the dataset of student academic performance metrics  ----------------------------------------------------------->>>

In [None]:
# Assuming the dataset is stored in a CSV file named 'student_performance.csv'
df = pd.read_csv('student_performance.csv')

Clean the data (if needed) by handling missing values, outliers, etc. ----------------------------------->>>

In [None]:
# Checking for missing values
print("Missing Values:\n", df.isnull().sum())

# Handling missing values
# Option 1: Drop missing values
df = df.dropna()

# Option 2: Fill missing values with mean, median or mode
# Example: Fill missing values in 'column_name' with mean
df['column_name'].fillna(df['column_name'].mean(), inplace=True)

Clean the data (if needed) by handling missing values, outliers, etc.  -------------------------------------->>>

In [None]:
# Checking for outliers
# Option 1: Using IQR (Interquartile Range)
Q1 = df['column_name'].quantile(0.25)
Q3 = df['column_name'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['column_name'] < lower_bound) | (df['column_name'] > upper_bound)]
print("Outliers:\n", outliers)


Perform data cleaning operations as per the specific requirements of the dataset.  ------------------------------------->>>

In [None]:
# Option 2: Using Z-score
from scipy.stats import zscore
z_scores = zscore(df['column_name'])
outliers = df[np.abs(z_scores) > 3]
print("Outliers:\n", outliers)

# Handling outliers
# Option 1: Remove outliers
df = df[(df['column_name'] > lower_bound) & (df['column_name'] < upper_bound)]

# Option 2: Cap or floor outliers
df['column_name'] = np.where(df['column_name'] < lower_bound, lower_bound, df['column_name'])
df['column_name'] = np.where(df['column_name'] > upper_bound, upper_bound, df['column_name'])

# Other data cleaning operations
# Example: Remove unnecessary columns
df = df.drop(['unnecessary_column1', 'unnecessary_column2'], axis=1)

# Example: Convert data types
df['column_name'] = df['column_name'].astype(int)

# Example: Renaming columns
df = df.rename(columns={'old_column_name': 'new_column_name'})

# Example: Fixing inconsistent values
df['column_name'] = df['column_name'].replace('incorrect_value', 'correct_value')

# Example: Removing duplicates
df = df.drop_duplicates()

# Example: Reordering columns
df = df[['column_name1', 'column_name2', 'column_name3']]

# Saving cleaned dataset
df.to_csv('cleaned_dataset.csv', index=False)


Evaluate academic performance of students ---------------------------------------------------->>>

In [None]:
# Assign values to variables
a = 1-1 semester percentage
b = 1-2 semester percentage
c = 2-1 semester percentage
d = 2-2 semester percentage
e = 3-1 semester percentage
f = 3-2 semester Percentage
g = Attendance percentage
h = extracurricular activities
i = Academic awards and achievements
j = Coding skills
k = [a, b, c, d, e, f] # semester grades


In [None]:
# Calculate dropout
dropout = 1 if min(k) < 35 and g < 30 else 0

In [None]:
# Calculate good performance
good_performance = 1 if all(grade > 60 for grade in k) else 0

In [None]:
# Calculate poor performance
poor_performance = 1 if max(k) < 40 else 0

In [None]:
# Calculate support required
support_required = 1 if any(40 <= grade < 60 for grade in k) else 0

In [None]:
# Calculate eligibility for placement
eligible_for_placement = 1 if all(grade > 65 for grade in k) and (j or i or h) else 0

In [None]:
# Print the results
print("Dropout: ", dropout)
print("Good Performance: ", good_performance)
print("Poor Performance: ", poor_performance)
print("Support Required: ", support_required)
print("Eligible for Placement: ", eligible_for_placement)

Display the output of academic performance categories  ---------------------------------------------------------->>>

In [None]:
print("Number of dropouts: ", df['dropout'].sum())
print("Number of good performers: ", df['good_performance'].sum())
print("Number of poor performers: ", df['poor_performance'].sum())
print("Number of students requiring support: ", df['support_required'].sum())
print("Number of students eligible for placement: ", df['eligible_for_placement'].sum())


Visualize critical values as graphs across all students  ---------------------------------------->>>

In [None]:
# Visualize critical values using boxplot
plt.figure(figsize=(8, 6))
sns.boxplot(x='student_id', y='critical_value', data=df)
plt.title('Critical Values Distribution by Student')
plt.xlabel('Student ID')
plt.ylabel('Critical Value')
plt.show()

# Visualize critical values using violinplot
plt.figure(figsize=(8, 6))
sns.violinplot(x='student_id', y='critical_value', data=df)
plt.title('Critical Values Distribution by Student')
plt.xlabel('Student ID')
plt.ylabel('Critical Value')
plt.show()

# Visualize critical values using swarmplot
plt.figure(figsize=(8, 6))
sns.swarmplot(x='student_id', y='critical_value', data=df)
plt.title('Critical Values Distribution by Student')
plt.xlabel('Student ID')
plt.ylabel('Critical Value')
plt.show()



Create appropriate plots to visualize the distribution of performance metrics ------------------------>>>

In [None]:
plt.figure(figsize=(8, 6))
sns.histplot(df['performance_metric'], bins=10, kde=True)
plt.title('Distribution of Performance Metrics')
plt.xlabel('Performance Metric')
plt.ylabel('Frequency')
plt.show()

# Example: Visualize performance metrics using a boxplot
plt.figure(figsize=(8, 6))
sns.boxplot(df['performance_metric'])
plt.title('Distribution of Performance Metrics')
plt.xlabel('Performance Metric')
plt.ylabel('Value')
plt.show()

# Example: Visualize performance metrics using a violinplot
plt.figure(figsize=(8, 6))
sns.violinplot(df['performance_metric'])
plt.title('Distribution of Performance Metrics')
plt.xlabel('Performance Metric')
plt.ylabel('Value')
plt.show()


Prepare data for LSTM model  -------------------------------------------->>>

In [None]:
X = df[['a', 'b', 'c', 'd', 'e', 'f']].values  # Input features - semester percentages
y = df[['good_performance', 'poor_performance', 'support_required', 'dropout']].values  # Target variables - performance categories


Split the data into training and testing sets  ----------------------------------------------->>>

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Reshape input data into the required format for LSTM modeling  ---------------------------------->>>

In [None]:
n_timesteps = X.shape[1]
n_features = 1
X_train = X_train.reshape(X_train.shape[0], n_timesteps, n_features)
X_test = X_test.reshape(X_test.shape[0], n_timesteps, n_features)


Build and compile the LSTM model  ---------------------------------------------------->>>

In [None]:
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(n_timesteps, n_features)))
model

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=64)

In [None]:
# Evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

X--------------------------------------------------------- END OF CODE ---------------------------------------------------X