# UAS Data Science - Analisis Dataset Siswa

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

df = pd.read_csv("student-mat.csv", sep=";")
df.head()

## 1. Exploratory Data Analysis (EDA)

In [None]:
df.info()
df.describe(include='all')

In [None]:
sns.histplot(df['G3'], kde=True, bins=15)
plt.title("Distribusi Nilai Akhir G3")
plt.xlabel("G3")
plt.ylabel("Jumlah")
plt.show()

## 2. Regresi Linear: Studytime vs G3

In [None]:
X = df[['studytime']]
y = df['G3']
reg = LinearRegression()
reg.fit(X, y)

sns.regplot(x='studytime', y='G3', data=df, ci=None, line_kws={'color': 'red'})
plt.title('Regresi Linear antara Studytime dan G3')
plt.show()

## 3. Clustering: Absences dan Studytime

In [None]:
scaler = StandardScaler()
scaled = scaler.fit_transform(df[['absences', 'studytime']])
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(scaled)

sns.scatterplot(data=df, x='absences', y='studytime', hue='cluster', palette='Set1')
plt.title("Clustering Siswa")
plt.show()

## 4. Klasifikasi G3

In [None]:
df['G3_cat'] = pd.cut(df['G3'], bins=[-1,9,14,20], labels=['low','medium','high'])
X = df[['studytime', 'absences', 'goout']]
y = df['G3_cat']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))