In [128]:
import json
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.special import softmax as scipy_softmax

In [130]:
df = pd.read_csv('../datasets/SEHIR/processed_dataset.csv')
df

Unnamed: 0,Course Code,Course Title,Student Number,Department Code,Course Level,Letter Grade,Status,GPA,Standing,Completed Credits,Completed ECTS,GPA Student - Subject,Avg. Grade - Taken,Avg. Grade - Students_Subject,Semester,Theoritical,Practical,Course Credit,ECTS,Course Year
0,UNI 111,Critical Reading & Writing in Turkish I,240,SOC,Undergraduate,F,Unsuccessful,2.62,Freshman,18,30,2.616667,2.113636,2.703226,2011 - Fall,3,0,3,5,1
1,UNI 107,World Civilizations& Global Encounters I,338,PSY,Undergraduate,A,Successful,3.68,Freshman,18,30,3.683333,2.986364,2.703226,2011 - Fall,3,0,3,5,1
2,UNI 105,Understanding Society and Culture I,338,PSY,Undergraduate,A,Successful,3.68,Freshman,18,30,3.683333,3.211538,2.703226,2011 - Fall,3,0,3,5,1
3,UNI 103,Understanding Science and Technology,338,PSY,Undergraduate,A,Successful,3.68,Freshman,18,30,3.683333,3.176000,2.703226,2011 - Fall,3,0,3,5,1
4,UNI 105,Understanding Society and Culture I,240,SOC,Undergraduate,A,Successful,2.62,Freshman,18,30,2.616667,3.211538,2.703226,2011 - Fall,3,0,3,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,MGT 531,Stratejik Yönetim,1102,MBA/NT,Graduate,A-,Successful,3.80,Freshman,24,40,3.800000,3.391758,3.049526,2014 - Spring,3,0,3,5,5
48737,MGT 585,Operasyon Yönetimi,1102,MBA/NT,Graduate,A,Successful,3.80,Freshman,24,40,3.800000,3.440476,3.049526,2014 - Spring,3,0,3,5,5
48738,MGT 552,İnsan Kaynakları Yönetimi,1102,MBA/NT,Graduate,A,Successful,3.80,Freshman,24,40,3.800000,3.393077,3.049526,2014 - Spring,3,0,3,5,5
48739,MGT 574,Makroekonomik Göster. ve Pol. Anal.,1984,MBA/NT,Graduate,A-,Successful,3.20,Sophomore,24,40,3.200000,3.570769,3.049526,2014 - Spring,3,0,3,5,5


In [132]:
df_raw = df.copy()

In [134]:
df.drop([df.columns[0], df.columns[1], df.columns[2]], inplace=True, axis=1)   # dropping course details

In [136]:
df = pd.concat([df, pd.get_dummies(df['Course Year'], prefix='Course Year'), pd.get_dummies(df['Department Code'], prefix='Department Code'), pd.get_dummies(df['Course Level'], prefix='Course Level'), pd.get_dummies(df['Standing'], prefix='Standing'), pd.get_dummies(df['Status'], prefix='Status')], axis=1)
df.drop(['Course Year', 'Department Code', 'Course Level', 'Status', 'Standing'], axis=1, inplace=True)

In [138]:
columns = df.columns

In [140]:
le = LabelEncoder()
le.fit(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'F'])

In [142]:
def standardize(X_train, X_test):
    X_train_cols = X_train.columns
    X_test_cols = X_test.columns
    sc = StandardScaler()
    fitted_sc = sc.fit(X_train)
    X_train_std = pd.DataFrame(fitted_sc.transform(X_train), columns=X_train_cols)
    X_test_std = pd.DataFrame(fitted_sc.transform(X_test), columns=X_test_cols)
    return X_train_std, X_test_std

In [144]:
def get_train_data(df, train_sem, columns):
    dataFrame = pd.DataFrame(columns=columns)
    for sem in train_sem:
        dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
    
    X_train = dataFrame.drop('Semester', axis=1)
    y_train = le.transform(X_train.pop('Letter Grade'))
    return X_train, y_train

In [146]:
def define_structure(X, Y):
    input_unit = X.shape[0] # size of input layer
    hidden_unit = X.shape[0] # hidden layer of size 4
    output_unit = Y.shape[0] # size of output layer
    return (input_unit, hidden_unit, output_unit)

In [148]:
def parameters_initialization(input_unit, hidden_unit, output_unit):
    np.random.seed(41)
    W1 = np.random.randn(hidden_unit, input_unit) * 0.01
    b1 = np.zeros((hidden_unit, 1))
    W2 = np.random.randn(output_unit, hidden_unit) * 0.01
    b2 = np.zeros((output_unit, 1))
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [150]:
def softmax(x):
    return scipy_softmax(x)

In [152]:
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = softmax(Z2)
    cache = {"Z1": Z1,"A1": A1,"Z2": Z2,"A2": A2}
    
    return A2, cache

In [154]:
def entropy_cost(A2, Y, parameters):
    #number of training example
    m = Y.shape[1]
    logprobs = np.multiply(np.log(A2), Y)
    cost = - np.sum(logprobs) / m
    cost = float(np.squeeze(cost))
                                    
    return cost

In [156]:
def backward_propagation(parameters, cache, X, Y):
    #number of training example
    m = X.shape[1]
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    A1 = cache['A1']
    A2 = cache['A2']
   
    dZ2 = A2-Y
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1/m) * np.dot(dZ1, X.T) 
    db1 = (1/m)*np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {"dW1": dW1, "db1": db1, "dW2": dW2,"db2": db2}
    
    return grads

In [158]:
def gradient_descent(parameters, grads, learning_rate = 0.01):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
   
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {"W1": W1, "b1": b1,"W2": W2,"b2": b2}
    
    return parameters

In [172]:
def neural_network_model(X, Y, hidden_unit, num_iterations = 1000):
    np.random.seed(3)
    input_unit = define_structure(X, Y)[0]
    output_unit = define_structure(X, Y)[2]
    
    parameters = parameters_initialization(input_unit, hidden_unit, output_unit)
   
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    
    for i in range(0, num_iterations):
        A2, cache = forward_propagation(X, parameters)
        cost = entropy_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, cache, X, Y)
        parameters = gradient_descent(parameters, grads)
        if i % 5 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    return parameters

In [162]:
def prediction(parameters, X):
    A2, cache = forward_propagation(X, parameters)
    print(A2)
    predictions = np.round(A2)
    print(predictions)
    
    return predictions

In [174]:
def get_error_score(df, columns):
    error_scores = {}
    sorted_semesters = sorted(set(df.iloc[:, 7]))
    for sem_idx in range(1, len(sorted_semesters)):
        training_sem = sorted_semesters[:sem_idx]
        test_sem = sorted_semesters[sem_idx]
        X_train, y_train = get_train_data(df, training_sem, columns)
        X_test = df[df.iloc[:, 7] == test_sem]
        X_test.drop('Semester', axis=1, inplace=True)
        y_test = le.transform(X_test.pop('Letter Grade'))
        
        X_train, X_test = standardize(X_train, X_test)
        
        X_train = X_train.T.to_numpy()   # (number of attributes, number of samples)
        y_train = y_train.reshape(1, y_train.shape[0])   # (1, number of samples)
        
        X_test = X_test.T.to_numpy()   # (number of attributes, number of samples)
        y_test = y_test.reshape(1, y_test.shape[0])   # (1, number of samples)
        
        parameters = neural_network_model(X_train, y_train, 58, num_iterations=5000)
        
        y_pred_test = prediction(parameters, X_test)
        rmse_test = round(np.sqrt(mean_squared_error(y_test[0], y_pred_test[0])), 3)
        mae_test = round(mean_absolute_error(y_test[0], y_pred_test[0]), 3)
        print(y_test)
        print("-------------------------------------")
        
        y_pred_train = prediction(parameters, X_train)
        rmse_train = round(np.sqrt(mean_squared_error(y_train[0], y_pred_train[0])), 3)
        mae_train = round(mean_absolute_error(y_train[0], y_pred_train[0]),3)
        
        error_scores.setdefault(sem_idx, {})
        error_scores[sem_idx]['rmse'] = [rmse_train, rmse_test]
        error_scores[sem_idx]['mae'] = [mae_train, mae_test]        
        
    return error_scores

In [176]:
model_results = {}

In [None]:
scores = get_error_score(df, columns)
model_results['NN'] = scores

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 0: 33.743522
Cost after iteration 5: 33.719093
Cost after iteration 10: 33.681952
Cost after iteration 15: 33.614907
Cost after iteration 20: 33.491912
Cost after iteration 25: 33.285205
Cost after iteration 30: 33.021617
Cost after iteration 35: 32.976266
Cost after iteration 40: 33.985790
Cost after iteration 45: 37.156127
Cost after iteration 50: 42.649274
Cost after iteration 55: 49.719905
Cost after iteration 60: 57.628961
Cost after iteration 65: 65.843473
Cost after iteration 70: 73.941751
Cost after iteration 75: 81.529838
Cost after iteration 80: 88.149611
Cost after iteration 85: 93.208623
Cost after iteration 90: 96.075862
Cost after iteration 95: 96.407363
Cost after iteration 100: 94.476074
Cost after iteration 105: 91.057991
Cost after iteration 110: 86.853672
Cost after iteration 115: 82.349323
Cost after iteration 120: 77.933529
Cost after iteration 125: 73.875085
Cost after iteration 130: 70.295769
Cost after iteration 135: 67.166252
Cost after ite

  logprobs = np.multiply(np.log(A2), Y)
  logprobs = np.multiply(np.log(A2), Y)


Cost after iteration 305: nan
Cost after iteration 310: nan
Cost after iteration 315: nan
Cost after iteration 320: nan
Cost after iteration 325: nan
Cost after iteration 330: nan
Cost after iteration 335: nan
Cost after iteration 340: nan
Cost after iteration 345: nan
Cost after iteration 350: nan
Cost after iteration 355: nan
Cost after iteration 360: nan
Cost after iteration 365: nan
Cost after iteration 370: nan
Cost after iteration 375: nan
Cost after iteration 380: nan
Cost after iteration 385: nan
Cost after iteration 390: nan
Cost after iteration 395: nan
Cost after iteration 400: nan
Cost after iteration 405: nan
Cost after iteration 410: nan
Cost after iteration 415: nan
Cost after iteration 420: nan
Cost after iteration 425: nan
Cost after iteration 430: nan
Cost after iteration 435: nan
Cost after iteration 440: nan
Cost after iteration 445: nan
Cost after iteration 450: nan
Cost after iteration 455: nan
Cost after iteration 460: nan
Cost after iteration 465: nan
Cost after

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 0: 36.603492
Cost after iteration 5: 36.581672
Cost after iteration 10: 36.549436
Cost after iteration 15: 36.492906
Cost after iteration 20: 36.390899
Cost after iteration 25: 36.217763
Cost after iteration 30: 35.976065
Cost after iteration 35: 35.822642
Cost after iteration 40: 36.347276
Cost after iteration 45: 38.636347
Cost after iteration 50: 43.297151
Cost after iteration 55: 49.815894
Cost after iteration 60: 57.404688
Cost after iteration 65: 65.472687
Cost after iteration 70: 73.593446
Cost after iteration 75: 81.430565
Cost after iteration 80: 88.642032
Cost after iteration 85: 94.758018
Cost after iteration 90: 99.125735
Cost after iteration 95: 101.117315
Cost after iteration 100: 100.497044
Cost after iteration 105: 97.721184
Cost after iteration 110: 93.687626
Cost after iteration 115: 89.069618
Cost after iteration 120: 84.330140
Cost after iteration 125: 79.658020
Cost after iteration 130: 75.178539
Cost after iteration 135: 71.107868
Cost after i

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 5: 42.108394
Cost after iteration 10: 42.072383
Cost after iteration 15: 42.007333
Cost after iteration 20: 41.887179
Cost after iteration 25: 41.682735
Cost after iteration 30: 41.417470
Cost after iteration 35: 41.373120
Cost after iteration 40: 42.457581
Cost after iteration 45: 45.928161
Cost after iteration 50: 51.968221
Cost after iteration 55: 59.724845
Cost after iteration 60: 68.364856
Cost after iteration 65: 77.284273
Cost after iteration 70: 86.049472
Cost after iteration 75: 94.346974
Cost after iteration 80: 101.873030
Cost after iteration 85: 108.179113
Cost after iteration 90: 112.508936
Cost after iteration 95: 113.932177
Cost after iteration 100: 112.133391
Cost after iteration 105: 107.805292
Cost after iteration 110: 102.233592
Cost after iteration 115: 96.310434
Cost after iteration 120: 90.397590
Cost after iteration 125: 84.726026
Cost after iteration 130: 79.452964
Cost after iteration 135: 74.701495
Cost after iteration 140: 70.729219
Cost 

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 0: 44.329768
Cost after iteration 5: 44.305727
Cost after iteration 10: 44.269301
Cost after iteration 15: 44.203247
Cost after iteration 20: 44.080823
Cost after iteration 25: 43.871915
Cost after iteration 30: 43.600288
Cost after iteration 35: 43.553807
Cost after iteration 40: 44.653639
Cost after iteration 45: 48.163712
Cost after iteration 50: 54.263015
Cost after iteration 55: 62.080603
Cost after iteration 60: 70.749815
Cost after iteration 65: 79.636407
Cost after iteration 70: 88.312001
Cost after iteration 75: 96.539407
Cost after iteration 80: 104.204782
Cost after iteration 85: 111.214557
Cost after iteration 90: 117.388525
Cost after iteration 95: 122.290611
Cost after iteration 100: 124.923248
Cost after iteration 105: 123.894891
Cost after iteration 110: 119.043443
Cost after iteration 115: 111.850127
Cost after iteration 120: 103.978036
Cost after iteration 125: 96.519127
Cost after iteration 130: 89.723069
Cost after iteration 135: 83.441297
Cost 

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 0: 48.099923
Cost after iteration 5: 48.076663
Cost after iteration 10: 48.041595
Cost after iteration 15: 47.978244
Cost after iteration 20: 47.860882
Cost after iteration 25: 47.659730
Cost after iteration 30: 47.394480
Cost after iteration 35: 47.340177
Cost after iteration 40: 48.423051
Cost after iteration 45: 51.992591
Cost after iteration 50: 58.286247
Cost after iteration 55: 66.374116
Cost after iteration 60: 75.304727
Cost after iteration 65: 84.369101
Cost after iteration 70: 93.081242
Cost after iteration 75: 101.165989
Cost after iteration 80: 108.487690
Cost after iteration 85: 114.925238
Cost after iteration 90: 120.210474
Cost after iteration 95: 123.676710
Cost after iteration 100: 124.102298
Cost after iteration 105: 120.862617
Cost after iteration 110: 114.800724
Cost after iteration 115: 107.584415
Cost after iteration 120: 100.569646
Cost after iteration 125: 94.347029
Cost after iteration 130: 88.616597
Cost after iteration 135: 83.105814
Cost

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.drop('Semester', axis=1, inplace=True)


Cost after iteration 0: 50.868461
Cost after iteration 5: 50.845632
Cost after iteration 10: 50.811138
Cost after iteration 15: 50.748691
Cost after iteration 20: 50.632671
Cost after iteration 25: 50.433005
Cost after iteration 30: 50.168175
Cost after iteration 35: 50.113025
Cost after iteration 40: 51.209917
Cost after iteration 45: 54.859306
Cost after iteration 50: 61.319337
Cost after iteration 55: 69.608697
Cost after iteration 60: 78.710701
Cost after iteration 65: 87.878797
Cost after iteration 70: 96.630066
Cost after iteration 75: 104.704773
Cost after iteration 80: 111.960679
Cost after iteration 85: 118.271333
Cost after iteration 90: 123.435164
Cost after iteration 95: 126.999714
Cost after iteration 100: 128.000541
Cost after iteration 105: 125.424609
Cost after iteration 110: 119.624708
Cost after iteration 115: 112.090867
Cost after iteration 120: 104.362847
Cost after iteration 125: 97.834149
Cost after iteration 130: 92.192303
Cost after iteration 135: 86.704648
Cost

[[0.0005556  0.00055643 0.00056103 ... 0.000562   0.00055086 0.00054934]]
[[0. 0. 0. ... 0. 0. 0.]]
[[0.00045313 0.00044695 0.00044582 ... 0.00045372 0.00045303 0.0004549 ]]
[[0. 0. 0. ... 0. 0. 0.]]

In [95]:
with open('nn_results_3.json', 'w') as fw:
    json.dump(model_results, fw)

### no fallback icin 

In [None]:
# no fallback icin yazdigim get error fonksiyonu, maskeli dataseti (subset) aliyor ve onlar uzerinde bi tahmin yapiyor
import json
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 1. MASKEYİ YÜKLE
with open('sota_subset_mask.json', 'r') as f:
    valid_mask = json.load(f)

def get_error_score_masked_custom_nn(df_encoded, df_raw, columns, mask):
    error_scores = {}
    
    # Semester indexi 7 varsayımı
    sorted_semesters = sorted(set(df_encoded.iloc[:, 7])) 
    
    print("Neural Network (Custom) Subset Evaluation running...")
    
    for sem_idx in range(1, len(sorted_semesters)):
        training_sem = sorted_semesters[:sem_idx]
        test_sem = sorted_semesters[sem_idx]
        
        # 1. EĞİTİM VERİSİ
        # get_train_data -> X_train (DataFrame), Y_train (1D Array - Integers)
        X_train, Y_train = get_train_data(df_encoded, training_sem, columns)
        
        # --- HATA DÜZELTME: ONE-HOT ENCODING ---
        # Custom NN, Y verisini (Sınıf Sayısı, Örnek Sayısı) formatında bekliyor.
        # Şu an Y_train sadece tamsayılar (0, 1, 2...) içeriyor.
        if Y_train.ndim == 1:
            num_classes = len(le.classes_) # Global 'le' nesnesinden sınıf sayısını al
            # Tamsayıları One-Hot Matrise çevir: (N, 13)
            Y_train_onehot = np.eye(num_classes)[Y_train.astype(int)]
            # Transpoze et ki (13, N) olsun
            Y_train = Y_train_onehot.T
        # ----------------------------------------
        
        # 2. TEST VERİSİ HAZIRLIĞI
        test_rows_mask = df_encoded.iloc[:, 7] == test_sem
        test_df_raw = df_raw[test_rows_mask].copy()
        
        if len(test_df_raw) == 0: continue

        # Model girdisi (Features)
        X_test = df_encoded.loc[test_rows_mask, columns].copy()
        
        # SÜTUN EŞİTLEME (Unseen feature hatasını önlemek için)
        if hasattr(X_train, 'columns'): # X_train DataFrame ise
             X_test = X_test[X_train.columns]
        
        # Gerçek Notlar (RMSE hesabı için Integer olarak kalsın)
        Y_test_real = le.transform(test_df_raw['Letter Grade'])

        # 3. SCALING
        X_train, X_test = standardize(X_train, X_test)
        
        # 4. TRANSPOSE 
        # X de (Features, Samples) olmalı
        X_train = X_train.T 
        # Y_train zaten yukarıda One-Hot ve Transpose yapıldı
        X_test = X_test.T
        
        # 5. EĞİTİM
        # Y_train artık (13, N) boyutunda olduğu için hata vermeyecek
        parameters = neural_network_model(X_train, Y_train, hidden_unit=4, num_iterations=1000)
        
        # 6. TAHMİN
        predictions = prediction(parameters, X_test) 
        # One-hot çıktıdan (13, N) -> Sınıf indeksine (N,) dönüştür
        y_pred_full = np.argmax(predictions, axis=0)
        
        # 7. MASKELEME (FİLTRELEME)
        y_true_filtered = []
        y_pred_filtered = []
        
        test_df_raw = test_df_raw.reset_index(drop=True)
        num_samples = test_df_raw.shape[0]
        
        for i in range(num_samples):
            row = test_df_raw.iloc[i]
            student_id = str(row['Student Number'])
            course_code = row['Course Code']
            
            is_valid = False
            
            if test_sem in mask:
                if student_id in mask[test_sem]:
                    if course_code in mask[test_sem][student_id]:
                        is_valid = True
            
            if is_valid:
                y_true_filtered.append(Y_test_real[i])
                y_pred_filtered.append(y_pred_full[i])
                
        # 8. METRİK HESABI
        if len(y_true_filtered) > 0:
            rmse_test = round(np.sqrt(mean_squared_error(y_true_filtered, y_pred_filtered)), 3)
            mae_test = round(mean_absolute_error(y_true_filtered, y_pred_filtered), 3)
        else:
            rmse_test, mae_test = 0, 0
            
        error_scores.setdefault(sem_idx, {})
        error_scores[sem_idx]['rmse'] = [0, rmse_test]
        error_scores[sem_idx]['mae'] = [0, mae_test]
        
        print(f"Sem {sem_idx}: RMSE {rmse_test}")

    return error_scores

In [None]:
# Sonuclari al ve yazdir 

results_nn_custom = get_error_score_masked_custom_nn(
    df_encoded=df,        # One-Hot yapılmış, kolonları atılmış veri
    df_raw=df_raw,   # ID'lerin durduğu ham veri
    columns=columns, # Modelin kullanacağı kolon listesi
    mask=valid_mask
)


def save_subset_results(results_dict, model_name, filename):
    test_rmse_list = []
    test_mae_list = []
    
    print(f"--- {model_name} Sonuçları İşleniyor ---")
    
    for sem in results_dict:
        # Listelerin 2. elemanı (index 1) TEST hatasıdır
        # [Train_RMSE, Test_RMSE]
        rmse_val = results_dict[sem]['rmse'][1]
        mae_val = results_dict[sem]['mae'][1]
        
        # Eğer dönem boş geçildiyse (0.0) ortalamaya katma
        if rmse_val > 0:
            test_rmse_list.append(rmse_val)
            test_mae_list.append(mae_val)
            print(f"Semester {sem}: RMSE {rmse_val:.4f}")
    
    # Ortalamaları Hesapla
    avg_rmse = np.mean(test_rmse_list)
    avg_mae = np.mean(test_mae_list)
    
    # JSON Hazırla (Numpy tiplerini float'a çeviriyoruz)
    final_output = {
        "Model": model_name,
        "RMSE": float(avg_rmse),
        "MAE": float(avg_mae),
        "Type": "SOTA Subset Evaluation"
    }
    
    # Kaydet
    with open(filename, 'w') as f:
        json.dump(final_output, f, indent=4)
        
    print("-" * 30)
    print(f"✅ KAYDEDİLDİ: {filename}")
    print(f"Ortalama RMSE: {avg_rmse:.4f}")
    print(f"Ortalama MAE : {avg_mae:.4f}")
    print("-" * 30)
# Sonuçları Kaydet:
save_subset_results(results_nn_custom, "Mimis NN (Subset)", "results_nn_subset.json")

Neural Network (Custom) Subset Evaluation running...
Cost after iteration 0: 10.270643
Cost after iteration 5: 10.268684
Cost after iteration 10: 10.266733
Cost after iteration 15: 10.264789
Cost after iteration 20: 10.262851
Cost after iteration 25: 10.260921
Cost after iteration 30: 10.258997
Cost after iteration 35: 10.257080
Cost after iteration 40: 10.255169
Cost after iteration 45: 10.253265
Cost after iteration 50: 10.251367
Cost after iteration 55: 10.249476
Cost after iteration 60: 10.247591
Cost after iteration 65: 10.245711


  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 70: 10.243838
Cost after iteration 75: 10.241970
Cost after iteration 80: 10.240108
Cost after iteration 85: 10.238251
Cost after iteration 90: 10.236400
Cost after iteration 95: 10.234554
Cost after iteration 100: 10.232713
Cost after iteration 105: 10.230876
Cost after iteration 110: 10.229045
Cost after iteration 115: 10.227217
Cost after iteration 120: 10.225394
Cost after iteration 125: 10.223575
Cost after iteration 130: 10.221759
Cost after iteration 135: 10.219948
Cost after iteration 140: 10.218139
Cost after iteration 145: 10.216333
Cost after iteration 150: 10.214530
Cost after iteration 155: 10.212729
Cost after iteration 160: 10.210931
Cost after iteration 165: 10.209134
Cost after iteration 170: 10.207339
Cost after iteration 175: 10.205544
Cost after iteration 180: 10.203750
Cost after iteration 185: 10.201957
Cost after iteration 190: 10.200163
Cost after iteration 195: 10.198369
Cost after iteration 200: 10.196573
Cost after iteration 205: 10.19477

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 10: 10.863131
Cost after iteration 15: 10.861345
Cost after iteration 20: 10.859566
Cost after iteration 25: 10.857793
Cost after iteration 30: 10.856027
Cost after iteration 35: 10.854266
Cost after iteration 40: 10.852512
Cost after iteration 45: 10.850764
Cost after iteration 50: 10.849022
Cost after iteration 55: 10.847286
Cost after iteration 60: 10.845555
Cost after iteration 65: 10.843830
Cost after iteration 70: 10.842111
Cost after iteration 75: 10.840397
Cost after iteration 80: 10.838689
Cost after iteration 85: 10.836986
Cost after iteration 90: 10.835288
Cost after iteration 95: 10.833595
Cost after iteration 100: 10.831907
Cost after iteration 105: 10.830224
Cost after iteration 110: 10.828546
Cost after iteration 115: 10.826872
Cost after iteration 120: 10.825202
Cost after iteration 125: 10.823536
Cost after iteration 130: 10.821875
Cost after iteration 135: 10.820217
Cost after iteration 140: 10.818563
Cost after iteration 145: 10.816912
Cost after

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 15: 11.533676
Cost after iteration 20: 11.532306
Cost after iteration 25: 11.530942
Cost after iteration 30: 11.529581
Cost after iteration 35: 11.528226
Cost after iteration 40: 11.526875
Cost after iteration 45: 11.525528
Cost after iteration 50: 11.524186
Cost after iteration 55: 11.522848
Cost after iteration 60: 11.521515
Cost after iteration 65: 11.520185
Cost after iteration 70: 11.518859
Cost after iteration 75: 11.517537
Cost after iteration 80: 11.516219
Cost after iteration 85: 11.514905
Cost after iteration 90: 11.513594
Cost after iteration 95: 11.512286
Cost after iteration 100: 11.510982
Cost after iteration 105: 11.509680
Cost after iteration 110: 11.508382
Cost after iteration 115: 11.507086
Cost after iteration 120: 11.505793
Cost after iteration 125: 11.504502
Cost after iteration 130: 11.503214
Cost after iteration 135: 11.501927
Cost after iteration 140: 11.500643
Cost after iteration 145: 11.499360
Cost after iteration 150: 11.498079
Cost afte

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 5: 11.968710
Cost after iteration 10: 11.967437
Cost after iteration 15: 11.966169
Cost after iteration 20: 11.964905
Cost after iteration 25: 11.963645
Cost after iteration 30: 11.962390
Cost after iteration 35: 11.961140
Cost after iteration 40: 11.959893
Cost after iteration 45: 11.958651
Cost after iteration 50: 11.957412
Cost after iteration 55: 11.956178
Cost after iteration 60: 11.954947
Cost after iteration 65: 11.953720
Cost after iteration 70: 11.952497
Cost after iteration 75: 11.951277
Cost after iteration 80: 11.950061
Cost after iteration 85: 11.948848
Cost after iteration 90: 11.947638
Cost after iteration 95: 11.946432
Cost after iteration 100: 11.945228
Cost after iteration 105: 11.944027
Cost after iteration 110: 11.942829
Cost after iteration 115: 11.941633
Cost after iteration 120: 11.940440
Cost after iteration 125: 11.939248
Cost after iteration 130: 11.938059
Cost after iteration 135: 11.936872
Cost after iteration 140: 11.935686
Cost after i

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 0: 12.430550
Cost after iteration 5: 12.429463
Cost after iteration 10: 12.428380
Cost after iteration 15: 12.427301
Cost after iteration 20: 12.426226
Cost after iteration 25: 12.425154
Cost after iteration 30: 12.424086
Cost after iteration 35: 12.423022
Cost after iteration 40: 12.421961
Cost after iteration 45: 12.420904
Cost after iteration 50: 12.419850
Cost after iteration 55: 12.418799
Cost after iteration 60: 12.417751
Cost after iteration 65: 12.416706
Cost after iteration 70: 12.415665
Cost after iteration 75: 12.414626
Cost after iteration 80: 12.413590
Cost after iteration 85: 12.412556
Cost after iteration 90: 12.411525
Cost after iteration 95: 12.410497
Cost after iteration 100: 12.409471
Cost after iteration 105: 12.408447
Cost after iteration 110: 12.407425
Cost after iteration 115: 12.406405
Cost after iteration 120: 12.405387
Cost after iteration 125: 12.404370
Cost after iteration 130: 12.403355
Cost after iteration 135: 12.402341
Cost after ite

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 0: 12.765749
Cost after iteration 5: 12.764797
Cost after iteration 10: 12.763850
Cost after iteration 15: 12.762905
Cost after iteration 20: 12.761964
Cost after iteration 25: 12.761026
Cost after iteration 30: 12.760092
Cost after iteration 35: 12.759160
Cost after iteration 40: 12.758231
Cost after iteration 45: 12.757305
Cost after iteration 50: 12.756382
Cost after iteration 55: 12.755462
Cost after iteration 60: 12.754544
Cost after iteration 65: 12.753629
Cost after iteration 70: 12.752717
Cost after iteration 75: 12.751806
Cost after iteration 80: 12.750898
Cost after iteration 85: 12.749992
Cost after iteration 90: 12.749088
Cost after iteration 95: 12.748187
Cost after iteration 100: 12.747286
Cost after iteration 105: 12.746388
Cost after iteration 110: 12.745491
Cost after iteration 115: 12.744596
Cost after iteration 120: 12.743702
Cost after iteration 125: 12.742809
Cost after iteration 130: 12.741917
Cost after iteration 135: 12.741026
Cost after ite

  dataFrame = pd.concat([dataFrame, df[df.iloc[:, 7] == sem]], ignore_index=True)


Cost after iteration 0: 13.112792
Cost after iteration 5: 13.111935
Cost after iteration 10: 13.111081
Cost after iteration 15: 13.110229
Cost after iteration 20: 13.109381
Cost after iteration 25: 13.108535
Cost after iteration 30: 13.107692
Cost after iteration 35: 13.106852
Cost after iteration 40: 13.106015
Cost after iteration 45: 13.105180
Cost after iteration 50: 13.104347
Cost after iteration 55: 13.103517
Cost after iteration 60: 13.102690
Cost after iteration 65: 13.101864
Cost after iteration 70: 13.101041
Cost after iteration 75: 13.100220
Cost after iteration 80: 13.099400
Cost after iteration 85: 13.098583
Cost after iteration 90: 13.097767
Cost after iteration 95: 13.096953
Cost after iteration 100: 13.096140
Cost after iteration 105: 13.095329
Cost after iteration 110: 13.094519
Cost after iteration 115: 13.093710
Cost after iteration 120: 13.092903
Cost after iteration 125: 13.092096
Cost after iteration 130: 13.091290
Cost after iteration 135: 13.090484
Cost after ite