In [1]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LogisticRegression as lr
from sklearn.metrics import accuracy_score , classification_report

In [2]:
# Set a random seed for reproducibility
random.seed(1)

In [3]:
# Generate synthetic data for the example
num_students = 100

In [4]:
data = {
    'StudentID': [f'Student_{i}' for i in range(1, num_students + 1)],
    'ExamScores': [random.randint(50, 100) for _ in range(num_students)],
    'StudyHours': [random.uniform(1, 10) for _ in range(num_students)],
    'PreviousPerformance': [random.uniform(60, 100) for _ in range(num_students)],
    'Attendance': [random.uniform(0.7, 1.0) for _ in range(num_students)]
}

In [5]:
df = pd.DataFrame(data)

In [6]:
df.to_csv('student_dataset.csv', index=False)

In [7]:
df

Unnamed: 0,StudentID,ExamScores,StudyHours,PreviousPerformance,Attendance
0,Student_1,58,6.973820,92.135421,0.748606
1,Student_2,86,1.971381,86.392956,0.835273
2,Student_3,98,2.473285,77.051975,0.904267
3,Student_4,54,8.559564,89.498050,0.747683
4,Student_5,66,4.334704,65.027333,0.953506
...,...,...,...,...,...
95,Student_96,93,7.255458,93.044800,0.958630
96,Student_97,97,5.057605,91.595128,0.774189
97,Student_98,73,5.715059,67.516884,0.933333
98,Student_99,55,1.276302,91.420485,0.904623


In [8]:
X = df[['ExamScores', 'StudyHours', 'PreviousPerformance', 'Attendance']]
Y = (df['ExamScores'] > 70).astype(int)

In [9]:
X_train, X_test, y_train, y_test = tts(X, Y, test_size=0.2, random_state=1)

In [10]:
model = lr(random_state=1)

In [11]:
model.fit(X_train, y_train)

In [12]:
y_pred = model.predict(X_test)

In [13]:
y_pred

array([1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1])

In [14]:
accuracy = accuracy_score(y_test, y_pred)

In [15]:
print(accuracy)

1.0


In [17]:
model.predict([[70,4.160342,45.537605,0.759737]])



array([0])