# Libraries & Datasets

In [1]:
import pandas as pd
import numpy as np
import noael_eda as eda
from sklearn.linear_model import LogisticRegression
import plotly.express as pex
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [2]:
pass_fail = pd.read_csv("Pass_Fail.csv")
survey = pd.read_csv("Survey.csv")

# Matrix Conversion

In [3]:
def gen_score_matrix(student):
    """Convert a specific student's exam 1, 2, & 3 inputs into a score matrix

    Args:
        student ([str]): [Specify a student]
    """
    score_matrix = survey[survey['Student']==student].loc[:,"Question 1":"Question 6"].to_numpy()
    return score_matrix

# SVD

In [4]:
U, S, Vt = np.linalg.svd(gen_score_matrix('Noael'), full_matrices=False)
eda.svd_print(U, S, Vt)

——	U	——
[[-0.52445955  0.69198174 -0.49608815]
 [-0.44361037 -0.71939659 -0.53448889]
 [-0.72674068 -0.06024796  0.68426469]]
——	S	——
[15.34988426  3.20472265  1.05394774] —— Largest is S[0]
——	*Vᵀ[0]	——
[-0.4600929  -0.44955872 -0.34968095 -0.39702598 -0.4600929  -0.30760301]
——	Vᵀ[1]	——
[ 0.09626336 -0.78454817  0.12361762  0.10481788  0.09626336  0.58282313]
——	Vᵀ[2]	——
[-0.15797351 -0.23084381  0.17061238  0.81985206 -0.15797351 -0.44219215]


In [5]:
x1, x2, x3, y = [], [], [], []

for student in pass_fail['Student'].unique():
    U, S, Vt = np.linalg.svd(gen_score_matrix(student), full_matrices=False)
    x1.append(S[0])
    x2.append(S[1])
    x3.append(S[2])
    y.append(pass_fail[pass_fail['Student']==student]['Passed Class'])

In [6]:
model = LogisticRegression()
x = np.column_stack((x1, x2, x3))
y = np.array(y).ravel()

model = LogisticRegression()
model.fit(x, y)

print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

Coefficients: [[1.15530264 0.28849669 0.17566506]]
Intercept: [-18.50859385]


In [7]:
df = pd.DataFrame({'sigma1': x1, 'sigma2': x2, 'sigma3': x3, 'exit': y})
df['exit'] = df['exit'].map({0: 'Failed', 1: 'Passed'})

fig = pex.scatter_3d(df, x='sigma1', y='sigma2', z='sigma3', color='exit', symbol='exit', title='Student Survey in SVD Feature Space', opacity=0.8)
fig.update_layout(legend_title_text='Student Outcome',scene={'xaxis_title':'Σ₁', 'yaxis_title':'Σ₂', 'zaxis_title':'Σ₃'})
fig.update_traces(marker={'size':5})

fig.write_html("svd_feature_space.html", full_html=True, include_plotlyjs="cdn")
fig.show()

# PCA

In [10]:
from sklearn.preprocessing import StandardScaler

x = StandardScaler().fit_transform(x)
pca = PCA(n_components=3)
x_pca = pca.fit_transform(x)

print("Explained Variance Ratio:", pca.explained_variance_ratio_)
print("Total Explained Variance:", pca.explained_variance_ratio_.sum())

Explained Variance Ratio: [0.62044986 0.21639265 0.16315749]
Total Explained Variance: 1.0


In [11]:
pca_df = pd.DataFrame(x_pca, columns=['PC1', 'PC2', 'PC3'])
pca_df['exit'] = df['exit']

print(pca.components_)

[[-0.58031857  0.54379258  0.60623427]
 [ 0.55119444  0.8102569  -0.19916941]
 [ 0.59951235 -0.21857125  0.76994257]]


In [12]:
fig = pex.scatter_3d(pca_df, x='PC1', y='PC2', z='PC3', color='exit', symbol='exit', title='Student Survey in PCA Feature Space', opacity=0.8)

fig.update_traces(marker={'size':5})

fig.update_layout(scene={'xaxis_title':'PC 1', 'yaxis_title':'PC 2', 'zaxis_title':'PC 3'}, legend_title_text='Student Outcome')

fig.show()

## What about 4th dimension?