In [2]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import recommendations
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples

In [4]:
df = pd.read_csv('../datasets/SEHIR/processed_course_clustering_dataset.csv')
df = df[['Student Number', 'Course Code', 'Letter Grade', 'Semester', 'Course Credit', 'Course Year'] + list(df.columns[20:])]
df

Unnamed: 0,Student Number,Course Code,Letter Grade,Semester,Course Credit,Course Year,Subject,A+ rate,A rate,A- rate,...,C rate,C- rate,D+ rate,D rate,D- rate,F rate,Mean GPA - Students taken,Mean Grade - Students taken,STDEV GPA - Students taken,STDEV Grade - Students taken
0,240,UNI 111,F,2011 - Fall,3,1,UNI,0.013605,0.115646,0.149660,...,0.054422,0.040816,0.047619,0.020408,0.020408,0.088435,2.467279,2.680952,0.864273,1.193667
1,338,UNI 107,A,2011 - Fall,3,1,UNI,0.000000,0.318182,0.181818,...,0.090909,0.136364,0.045455,0.000000,0.045455,0.000000,2.893182,2.986364,0.821569,1.098612
2,338,UNI 105,A,2011 - Fall,3,1,UNI,0.115385,0.269231,0.038462,...,0.038462,0.000000,0.000000,0.000000,0.000000,0.038462,2.713846,3.211538,0.952080,0.937369
3,338,UNI 103,A,2011 - Fall,3,1,UNI,0.010929,0.153005,0.147541,...,0.081967,0.032787,0.000000,0.021858,0.027322,0.065574,2.483224,2.813115,0.977342,1.117219
4,240,UNI 105,A,2011 - Fall,3,1,UNI,0.115385,0.269231,0.038462,...,0.038462,0.000000,0.000000,0.000000,0.000000,0.038462,2.713846,3.211538,0.952080,0.937369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,1102,MGT 531,A-,2014 - Spring,3,5,MGT,0.000000,0.237805,0.420732,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.416209,3.391758,0.579819,0.793752
48737,1102,MGT 585,A,2014 - Spring,3,5,MGT,0.000000,0.297521,0.314050,...,0.008264,0.000000,0.000000,0.000000,0.000000,0.000000,3.404921,3.440476,0.535670,0.751231
48738,1102,MGT 552,A,2014 - Spring,3,5,MGT,0.000000,0.354331,0.251969,...,0.055118,0.000000,0.000000,0.000000,0.000000,0.031496,3.449615,3.393077,0.454990,0.824827
48739,1984,MGT 574,A-,2014 - Spring,3,5,MGT,0.000000,0.393443,0.327869,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.352769,3.570769,0.467632,0.708811


In [6]:
df = pd.concat([df, pd.get_dummies(df['Subject'], prefix='Subject'), pd.get_dummies(df['Course Year'], prefix='Course Year')], axis=1)
df.drop(['Subject', 'Course Year'], axis=1, inplace=True)

In [8]:
df

Unnamed: 0,Student Number,Course Code,Letter Grade,Semester,Course Credit,A+ rate,A rate,A- rate,B+ rate,B rate,...,Subject_PSY,Subject_SOC,Subject_SPA,Subject_UNI,Course Year_1,Course Year_2,Course Year_3,Course Year_4,Course Year_5,Course Year_6
0,240,UNI 111,F,2011 - Fall,3,0.013605,0.115646,0.149660,0.163265,0.156463,...,False,False,False,True,True,False,False,False,False,False
1,338,UNI 107,A,2011 - Fall,3,0.000000,0.318182,0.181818,0.045455,0.090909,...,False,False,False,True,True,False,False,False,False,False
2,338,UNI 105,A,2011 - Fall,3,0.115385,0.269231,0.038462,0.153846,0.153846,...,False,False,False,True,True,False,False,False,False,False
3,338,UNI 103,A,2011 - Fall,3,0.010929,0.153005,0.147541,0.142077,0.196721,...,False,False,False,True,True,False,False,False,False,False
4,240,UNI 105,A,2011 - Fall,3,0.115385,0.269231,0.038462,0.153846,0.153846,...,False,False,False,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,1102,MGT 531,A-,2014 - Spring,3,0.000000,0.237805,0.420732,0.207317,0.134146,...,False,False,False,False,False,False,False,False,True,False
48737,1102,MGT 585,A,2014 - Spring,3,0.000000,0.297521,0.314050,0.223140,0.115702,...,False,False,False,False,False,False,False,False,True,False
48738,1102,MGT 552,A,2014 - Spring,3,0.000000,0.354331,0.251969,0.181102,0.070866,...,False,False,False,False,False,False,False,False,True,False
48739,1984,MGT 574,A-,2014 - Spring,3,0.000000,0.393443,0.327869,0.278689,0.000000,...,False,False,False,False,False,False,False,False,True,False


In [10]:
numerical_grades = {'A+': 4.1, 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, 'C+': 2.3, 'C': 2.0,
                    'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'D-': 0.5, 'F': 0.0}

In [12]:
course_credits = {}
for row_idx in df.index:
    course_code = df.iloc[row_idx, 1]
    credit = df.iloc[row_idx, 4]    
    course_credits[course_code] = credit

In [13]:
def get_semester_data(semester_name):
    semester_data = {}   # semester data in shape {student_number: {course_code: letter_grade, ...}, ...}
    
    # extracting the instances with the given semester_name from the main dataFrame
    dataset = df[df.iloc[:, 3] == semester_name]
    dataset.index = range(len(dataset))
    
    # filling the semester_data dictionary
    for row_idx in dataset.index:
        student_number = dataset.iloc[row_idx, 0]
        course_code = dataset.iloc[row_idx, 1]
        letter_grade = dataset.iloc[row_idx, 2]
        
        semester_data.setdefault(student_number, {})
        semester_data[student_number][course_code] = numerical_grades[letter_grade]
    
    return semester_data

In [14]:
def get_avg_gpa(train_semester, student):
    courses = train_semester[student]
    total_credit = 0
    weights = 0
    for course in courses:
        total_credit += course_credits[course]
        weights += courses[course] * course_credits[course]
    
    return weights / total_credit

In [15]:
def get_grade_stats(semester_data, student):
    grade_list = []
    
    for course in semester_data[student]:
        numerical_grade = semester_data[student][course]
        grade_list.append(numerical_grade)
    
    mean = np.mean(grade_list)
    std_dev = np.std(grade_list)
    
    return mean, std_dev

In [20]:
def fit_cluster(train_sems, num_clusters, training_data, cluster_model):
    train_dataset = pd.DataFrame(columns=df.columns)
    
    # extracting instances from the dataset which should be in training data
    for sem in train_sems:
        train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)

    cluster_features = train_dataset[list(train_dataset.columns[4:])]
    
    # fitting a clustering model based on GPA, Completed Credits and Departments
    fitted_cluster_model = cluster_model(n_clusters=num_clusters).fit(cluster_features)
    cluster_labels = fitted_cluster_model.labels_

    # ===== Silhouette metrics =====
    sil_score = silhouette_score(cluster_features, cluster_labels)

    sil_samples = silhouette_samples(cluster_features, cluster_labels)
    cluster_silhouette = {}

    for lbl in np.unique(cluster_labels):
        cluster_silhouette[lbl] = sil_samples[cluster_labels == lbl].mean()
    # ==============================

    
    cluster_dataset = {}   # splitting the train dataset into sub-dicts based on their predicted cluster label
    
    # assigning each students' data to their predicted clusters
    for i in range(len(cluster_labels)):
        cluster_dataset.setdefault(cluster_labels[i], {})
        student_number = train_dataset.iloc[i, 0]
        cluster_dataset[cluster_labels[i]][student_number] = training_data[student_number]
    
    return cluster_dataset, fitted_cluster_model, sil_score, cluster_silhouette

In [22]:
def cluster_test_data(cluster_model, semester_name):
    # extracting all instances with the given semester_name from the main dataFrame
    test_dataset = df[df.iloc[:, 3] == semester_name]
    test_dataset.index = range(len(test_dataset))
    
    # predicting the cluster labels of test data using a cluster model fitted on the train data so far
    cluster_features = test_dataset[list(test_dataset.columns[4:])]
    cluster_labels = cluster_model.predict(cluster_features)
    
    # getting the semester data of available students in test semester
    semester_data = get_semester_data(semester_name)
    
    cluster_dataset = {}   # splitting the test dataset into sub-dicts based on their predicted cluster label
    
    # assigning each students' data to their predicted clusters
    for i in range(len(cluster_labels)):
        cluster_dataset.setdefault(cluster_labels[i], {})
        student_number = test_dataset.iloc[i, 0]
        cluster_dataset[cluster_labels[i]][student_number] = semester_data[student_number]
        
    return cluster_dataset    

In [24]:
# yeni eklendi 
def get_course_stats(train_semester):
    # Eğitim verisindeki her dersin ortalamasını hesaplar
    course_grades = {}
    all_grades = []
    
    for student, courses in train_semester.items():
        for course, grade in courses.items():
            course_grades.setdefault(course, []).append(grade)
            all_grades.append(grade)
            
    course_means = {c: np.mean(g) for c, g in course_grades.items()}
    global_mean = np.mean(all_grades) if all_grades else 0
    
    return course_means, global_mean

In [26]:
# yeni hali 
def get_errors(train_semester, test_semester, sim, item_based, fallback_strategy='course'): # Parametre eklendi
    average_gpa = {}
    y_true = []
    y_pred = []
    gpa = {}
    
    # Yeni strateji için hazırlık
    if fallback_strategy == 'course':
        course_means, global_mean = get_course_stats(train_semester)

    for student in train_semester:
        gpa[student] = get_avg_gpa(train_semester, student)
    
    # item_sims hesabı (Senin kodda yorumdaydı, item_based ise açılması lazım)
    if item_based:
        item_sims = recommendations.calculateSimilarItems(train_semester)

    for student in train_semester:
        recommended_courses = {}
        
        if item_based:
            recs = recommendations.getRecommendedItems(train_semester, item_sims, student)
        else:
            recs = recommendations.getRecommendations(train_semester, student, sim, dgpa=True, gpa=gpa, delta=0.7)
        
        for rec_grade, rec_course in recs:
            recommended_courses.setdefault(rec_course, rec_grade)
            
        average_gpa.setdefault(student, get_avg_gpa(train_semester, student))
        
        if student not in test_semester:
            continue
        
        # Öğrencinin kendi istatistikleri (Outlier kontrolü için)
        mean, std_dev = get_grade_stats(train_semester, student)
            
        for course_code in test_semester[student]:
            # 1. Tahmin Belirleme
            if course_code in recommended_courses:
                # CF Tahmini Var
                rec_grade = recommended_courses[course_code]
            else:
                # CF Tahmini Yok -> FALLBACK STRATEJİSİ
                if fallback_strategy == 'course':
                    # Yeni Yöntem: Dersin Ortalaması
                    rec_grade = course_means.get(course_code, global_mean)
                else:
                    # Eski Yöntem: Öğrencinin Ortalaması
                    rec_grade = average_gpa[student]

            # 2. Outlier (Aykırı Değer) Kontrolü (Senin kodundaki mantık)
            # Tahmin edilen not (veya fallback), öğrencinin normal aralığının dışındaysa dahil etme
            if rec_grade < mean - (2 * std_dev) or rec_grade > mean + (2 * std_dev):
                continue
            
            # 3. Listelere Ekle
            y_pred.append(rec_grade)
            y_true.append(test_semester[student][course_code])
            
    assert len(y_true) == len(y_pred)
    return y_true, y_pred

In [28]:
# yeni hali 
def predict(sim, cluster_model, item_based=False, fallback_strategy='course'): # <--- 1. Parametre eklendi (Varsayılan: 'course')
    predictions = {} 
    
    sorted_semesters = sorted(set(df.iloc[:, 3]))   # sorting semesters in a time series manner
    
    for num_clusters in range(15, 20, 5):
        print(f"Running for k={num_clusters}...")
        predictions.setdefault(str(num_clusters), {})
        train_semester = {}   # {student_number: {course_code: letter_grade, ...}, ...}
        
        for sem_idx in range(1, len(sorted_semesters)):
            predictions[str(num_clusters)].setdefault(str(sem_idx), {'y_true': [], 'y_pred': []})
            
            # combining all previous semesters in dataset and consider it as training semester
            new_semester = get_semester_data(sorted_semesters[sem_idx-1])
            for student in new_semester:
                if student in train_semester:   # combine data if a student already exist
                    train_semester[student].update(new_semester[student])
                else:   # create a new key-value pair for students with no record
                    train_semester[student] = new_semester[student]
            
            training_semesters_name = sorted_semesters[:sem_idx]   # names of all training semesters
            
            # getting the cluster model fitted on training data and each clusters' training data
            train_cluster_data, fitted_cluster_model, sil_score, cluster_silhouette = fit_cluster(training_semesters_name, num_clusters, train_semester, cluster_model)

            print(
                f"k={num_clusters}, "
                f"semester={sem_idx}, "
                f"silhouette={sil_score:.3f}"
            )
            
            # getting the clustered test data
            test_semester_name = sorted_semesters[sem_idx]
            test_cluster_data = cluster_test_data(fitted_cluster_model, test_semester_name)
            
            # fitting each cluster label with a similarity metric, and measure the error between the same
            # cluster labels in training and test data
            for cluster_label in train_cluster_data:
                if cluster_label not in test_cluster_data:
                    continue
                
                # <--- 2. BURADA: Parametre get_errors'a iletiliyor
                y_true, y_pred = get_errors(train_cluster_data[cluster_label], 
                                            test_cluster_data[cluster_label], 
                                            sim, 
                                            item_based, 
                                            fallback_strategy)
                                            
                predictions[str(num_clusters)][str(sem_idx)]['y_true'] += y_true
                predictions[str(num_clusters)][str(sem_idx)]['y_pred'] += y_pred
            
    return predictions

In [30]:
train_dataset = pd.DataFrame(columns=df.columns)
sorted_semesters = sorted(set(df.iloc[:, 3]))
for sem in sorted_semesters:
        train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
print(list(train_dataset.columns[4:]))

['Course Credit', 'A+ rate', 'A rate', 'A- rate', 'B+ rate', 'B rate', 'B- rate', 'C+ rate', 'C rate', 'C- rate', 'D+ rate', 'D rate', 'D- rate', 'F rate', 'Mean GPA - Students taken', 'Mean Grade - Students taken', 'STDEV GPA - Students taken', 'STDEV Grade - Students taken', 'Subject_ARAB', 'Subject_ARM', 'Subject_BGM', 'Subject_CS', 'Subject_CTV', 'Subject_CULT', 'Subject_ECE', 'Subject_ECON', 'Subject_EE', 'Subject_EECS', 'Subject_ENGR', 'Subject_FRE', 'Subject_GER', 'Subject_HIST', 'Subject_HUK', 'Subject_IE', 'Subject_ISE', 'Subject_ISS', 'Subject_ITM', 'Subject_LAW', 'Subject_LIFE', 'Subject_LING', 'Subject_LIT', 'Subject_MATH', 'Subject_MGT', 'Subject_MTS', 'Subject_PERS', 'Subject_PHIL', 'Subject_PHYS', 'Subject_POLS', 'Subject_PSY', 'Subject_SOC', 'Subject_SPA', 'Subject_UNI', 'Course Year_1', 'Course Year_2', 'Course Year_3', 'Course Year_4', 'Course Year_5', 'Course Year_6']


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


In [32]:
scar = train_dataset[train_dataset.columns[4:]]
scar

Unnamed: 0,Course Credit,A+ rate,A rate,A- rate,B+ rate,B rate,B- rate,C+ rate,C rate,C- rate,...,Subject_PSY,Subject_SOC,Subject_SPA,Subject_UNI,Course Year_1,Course Year_2,Course Year_3,Course Year_4,Course Year_5,Course Year_6
0,3,0.013605,0.115646,0.149660,0.163265,0.156463,0.081633,0.047619,0.054422,0.040816,...,False,False,False,True,True,False,False,False,False,False
1,3,0.000000,0.318182,0.181818,0.045455,0.090909,0.045455,0.000000,0.090909,0.136364,...,False,False,False,True,True,False,False,False,False,False
2,3,0.115385,0.269231,0.038462,0.153846,0.153846,0.076923,0.115385,0.038462,0.000000,...,False,False,False,True,True,False,False,False,False,False
3,3,0.010929,0.153005,0.147541,0.142077,0.196721,0.049180,0.071038,0.081967,0.032787,...,False,False,False,True,True,False,False,False,False,False
4,3,0.115385,0.269231,0.038462,0.153846,0.153846,0.076923,0.115385,0.038462,0.000000,...,False,False,False,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,3,0.000000,0.237805,0.420732,0.207317,0.134146,0.000000,0.000000,0.000000,0.000000,...,False,False,False,False,False,False,False,False,True,False
48737,3,0.000000,0.297521,0.314050,0.223140,0.115702,0.041322,0.000000,0.008264,0.000000,...,False,False,False,False,False,False,False,False,True,False
48738,3,0.000000,0.354331,0.251969,0.181102,0.070866,0.055118,0.000000,0.055118,0.000000,...,False,False,False,False,False,False,False,False,True,False
48739,3,0.000000,0.393443,0.327869,0.278689,0.000000,0.000000,0.000000,0.000000,0.000000,...,False,False,False,False,False,False,False,False,True,False


In [51]:
print(f"Scarsity = {scar.eq(False).sum().sum() / (48741 * 58)}%")

Scarsity = 0.7081480648239923%


In [None]:
# hangi fallback daha iyi diye baktik, student fallback (eski yontem) daha iyi cikti 
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Yardımcı Fonksiyon: Karmaşık sözlükten ortalama hatayı hesaplar
def calculate_global_metrics(predictions_dict):
    all_rmse = []
    all_mae = []
    
    # Her Cluster Sayısı (k=10, 15...) için dön
    for k in predictions_dict:
        # Her Dönem (Semester 1, 2...) için dön
        for sem in predictions_dict[k]:
            y_true = predictions_dict[k][sem]['y_true']
            y_pred = predictions_dict[k][sem]['y_pred']
            
            if len(y_true) > 0:
                # O dönem ve k için hatayı hesapla
                rmse = np.sqrt(mean_squared_error(y_true, y_pred))
                mae = mean_absolute_error(y_true, y_pred)
                
                all_rmse.append(rmse)
                all_mae.append(mae)
    
    # Tüm senaryoların ortalamasını al
    return np.mean(all_rmse), np.mean(all_mae)

# --- DENEY BAŞLIYOR ---

# 1. Eski Yöntem (Student Fallback)
print("Running Strategy: Student GPA Fallback (This may take a while)...")
# Fonksiyon parametrelerini kendi notebook'undaki ayarlara göre (sim_pearson vb.) düzenleyebilirsin
preds_student = predict(recommendations.sim_pearson, KMeans, item_based=True, fallback_strategy='student')
rmse_student, mae_student = calculate_global_metrics(preds_student)

# 2. Yeni Yöntem (Course Fallback)
print("Running Strategy: Course Average Fallback...")
preds_course = predict(recommendations.sim_pearson, KMeans, item_based=True, fallback_strategy='course')
rmse_course, mae_course = calculate_global_metrics(preds_course)

# 3. Tabloyu Yazdır
print("\nSensitivity Analysis (Clustering Model):")
print("-" * 60)
print(f"{'Fallback Strategy':<25} | {'Mean RMSE':<10} | {'Mean MAE':<10}")
print("-" * 60)
print(f"{'Student GPA (Old)':<25} | {rmse_student:.4f}     | {mae_student:.4f}")
print(f"{'Course Average (New)':<25} | {rmse_course:.4f}     | {mae_course:.4f}")
print("-" * 60)

# İyileşme Oranı
improvement = ((rmse_student - rmse_course) / rmse_student) * 100
print(f"Improvement in RMSE: {improvement:.2f}%")

Running Strategy: Student GPA Fallback (This may take a while)...
Running for k=10...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105
100 / 108
100 / 104
100 / 111


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 171
100 / 163
100 / 156
100 / 100
100 / 208
200 / 208
100 / 163
100 / 134
100 / 161
100 / 166


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 246
200 / 246
100 / 226
200 / 226
100 / 316
200 / 316
300 / 316
100 / 102
100 / 324
200 / 324
300 / 324
100 / 232
200 / 232
100 / 104
100 / 181
100 / 187
100 / 221
200 / 221


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 310
200 / 310
300 / 310
100 / 286
200 / 286
100 / 420
200 / 420
300 / 420
400 / 420
100 / 389
200 / 389
300 / 389
100 / 294
200 / 294
100 / 133
100 / 271
200 / 271
100 / 308
200 / 308
300 / 308
100 / 255
200 / 255
100 / 305
200 / 305
300 / 305


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 538
200 / 538
300 / 538
400 / 538
500 / 538
100 / 451
200 / 451
300 / 451
400 / 451
100 / 351
200 / 351
300 / 351
100 / 365
200 / 365
300 / 365
100 / 383
200 / 383
300 / 383
100 / 175
100 / 362
200 / 362
300 / 362
100 / 375
200 / 375
300 / 375
100 / 342
200 / 342
300 / 342


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 506
200 / 506
300 / 506
400 / 506
500 / 506
100 / 464
200 / 464
300 / 464
400 / 464
100 / 499
200 / 499
300 / 499
400 / 499
100 / 417
200 / 417
300 / 417
400 / 417
100 / 374
200 / 374
300 / 374
100 / 443
200 / 443
300 / 443
400 / 443
100 / 512
200 / 512
300 / 512
400 / 512
500 / 512
100 / 371
200 / 371
300 / 371
100 / 457
200 / 457
300 / 457
400 / 457
Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 105
100 / 102


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 171
100 / 163
100 / 160
100 / 112
100 / 138
100 / 163
100 / 182
100 / 136
100 / 105
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 235
200 / 235
100 / 226
200 / 226
100 / 194
100 / 219
200 / 219
100 / 102
100 / 179
100 / 231
200 / 231
100 / 104
100 / 233
200 / 233
100 / 220
200 / 220
100 / 142
100 / 187
100 / 239
200 / 239


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 310
200 / 310
300 / 310
100 / 286
200 / 286
100 / 400
200 / 400
300 / 400
400 / 400
100 / 319
200 / 319
300 / 319
100 / 192
100 / 245
200 / 245
100 / 293
200 / 293
100 / 214
200 / 214
100 / 283
200 / 283
100 / 256
200 / 256
100 / 246
200 / 246
100 / 295
200 / 295
100 / 288
200 / 288
100 / 156
100 / 114


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 354
200 / 354
300 / 354
100 / 386
200 / 386
300 / 386
100 / 433
200 / 433
300 / 433
400 / 433
100 / 140
100 / 409
200 / 409
300 / 409
400 / 409
100 / 306
200 / 306
300 / 306
100 / 366
200 / 366
300 / 366
100 / 290
200 / 290
100 / 314
200 / 314
300 / 314
100 / 355
200 / 355
300 / 355
100 / 360
200 / 360
300 / 360
100 / 207
200 / 207
100 / 147
100 / 154


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 462
200 / 462
300 / 462
400 / 462
100 / 423
200 / 423
300 / 423
400 / 423
100 / 460
200 / 460
300 / 460
400 / 460
100 / 470
200 / 470
300 / 470
400 / 470
100 / 379
200 / 379
300 / 379
100 / 436
200 / 436
300 / 436
400 / 436
100 / 299
200 / 299
100 / 485
200 / 485
300 / 485
400 / 485
100 / 375
200 / 375
300 / 375
100 / 428
200 / 428
300 / 428
400 / 428
100 / 467
200 / 467
300 / 467
400 / 467
100 / 475
200 / 475
300 / 475
400 / 475
100 / 425
200 / 425
300 / 425
400 / 425
100 / 285
200 / 285
100 / 181
Running for k=20...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 163
100 / 170
100 / 163
100 / 154
100 / 130
100 / 161
100 / 153
100 / 130
100 / 170
100 / 139
100 / 126
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 226
200 / 226
100 / 244
200 / 244
100 / 106
100 / 119
100 / 117
100 / 246
200 / 246
100 / 163
100 / 198
100 / 231
200 / 231
100 / 104
100 / 217
200 / 217
100 / 175
100 / 122
100 / 137
100 / 134
100 / 221
200 / 221
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 300
200 / 300
300 / 300
100 / 286
200 / 286
100 / 278
200 / 278
100 / 117
100 / 373
200 / 373
300 / 373
100 / 283
200 / 283
100 / 309
200 / 309
300 / 309
100 / 245
200 / 245
100 / 252
200 / 252
100 / 141
100 / 280
200 / 280
100 / 255
200 / 255
100 / 293
200 / 293
100 / 307
200 / 307
300 / 307
100 / 177
100 / 295
200 / 295
100 / 303
200 / 303
300 / 303
100 / 156
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 354
200 / 354
300 / 354
100 / 143
100 / 294
200 / 294
100 / 153
100 / 218
200 / 218
100 / 487
200 / 487
300 / 487
400 / 487
100 / 412
200 / 412
300 / 412
400 / 412
100 / 371
200 / 371
300 / 371
100 / 306
200 / 306
300 / 306
100 / 351
200 / 351
300 / 351
100 / 183
100 / 346
200 / 346
300 / 346
100 / 316
200 / 316
300 / 316
100 / 280
200 / 280
100 / 354
200 / 354
300 / 354
100 / 226
200 / 226
100 / 361
200 / 361
300 / 361
100 / 237
200 / 237
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 423
200 / 423
300 / 423
400 / 423
100 / 473
200 / 473
300 / 473
400 / 473
100 / 484
200 / 484
300 / 484
400 / 484
100 / 476
200 / 476
300 / 476
400 / 476
100 / 419
200 / 419
300 / 419
400 / 419
100 / 434
200 / 434
300 / 434
400 / 434
100 / 397
200 / 397
300 / 397
100 / 252
200 / 252
100 / 426
200 / 426
300 / 426
400 / 426
100 / 371
200 / 371
300 / 371
100 / 406
200 / 406
300 / 406
400 / 406
100 / 428
200 / 428
300 / 428
400 / 428
100 / 221
200 / 221
100 / 430
200 / 430
300 / 430
400 / 430
100 / 296
200 / 296
100 / 461
200 / 461
300 / 461
400 / 461
100 / 425
200 / 425
300 / 425
400 / 425
100 / 284
200 / 284
100 / 108
100 / 207
200 / 207
Running for k=25...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105
100 / 104


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 163
100 / 178
100 / 163
100 / 225
200 / 225
100 / 130
100 / 151
100 / 153
100 / 130
100 / 153
100 / 146


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 245
200 / 245
100 / 226
200 / 226
100 / 102
100 / 221
200 / 221
100 / 163
100 / 179
100 / 221
200 / 221
100 / 104
100 / 175
100 / 231
200 / 231
100 / 223
200 / 223
100 / 111
100 / 130
100 / 100
100 / 221
200 / 221
100 / 162
100 / 102
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 300
200 / 300
300 / 300
100 / 286
200 / 286
100 / 105
100 / 216
200 / 216
100 / 170
100 / 225
200 / 225
100 / 285
200 / 285
100 / 304
200 / 304
300 / 304
100 / 225
200 / 225
100 / 245
200 / 245
100 / 292
200 / 292
100 / 234
200 / 234
100 / 174
100 / 254
200 / 254
100 / 125
100 / 270
200 / 270
100 / 195
100 / 277
200 / 277
100 / 210
200 / 210
100 / 291
200 / 291
100 / 288
200 / 288
100 / 156
100 / 114


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 354
200 / 354
300 / 354
100 / 394
200 / 394
300 / 394
100 / 143
100 / 352
200 / 352
300 / 352
100 / 115
100 / 391
200 / 391
300 / 391
100 / 320
200 / 320
300 / 320
100 / 315
200 / 315
300 / 315
100 / 163
100 / 343
200 / 343
300 / 343
100 / 313
200 / 313
300 / 313
100 / 342
200 / 342
300 / 342
100 / 328
200 / 328
300 / 328
100 / 354
200 / 354
300 / 354
100 / 356
200 / 356
300 / 356
100 / 185
100 / 265
200 / 265
100 / 342
200 / 342
300 / 342
100 / 354
200 / 354
300 / 354
100 / 354
200 / 354
300 / 354
100 / 147
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 403
200 / 403
300 / 403
400 / 403
100 / 110
100 / 446
200 / 446
300 / 446
400 / 446
100 / 457
200 / 457
300 / 457
400 / 457
100 / 374
200 / 374
300 / 374
100 / 374
200 / 374
300 / 374
100 / 252
200 / 252
100 / 432
200 / 432
300 / 432
400 / 432
100 / 422
200 / 422
300 / 422
400 / 422
100 / 375
200 / 375
300 / 375
100 / 325
200 / 325
300 / 325
100 / 377
200 / 377
300 / 377
100 / 428
200 / 428
300 / 428
400 / 428
100 / 353
200 / 353
300 / 353
100 / 417
200 / 417
300 / 417
400 / 417
100 / 459
200 / 459
300 / 459
400 / 459
100 / 424
200 / 424
300 / 424
400 / 424
100 / 284
200 / 284
100 / 181
100 / 384
200 / 384
300 / 384
100 / 207
200 / 207
Running for k=30...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 166
100 / 163
100 / 163
100 / 115
100 / 147
100 / 138
100 / 122
100 / 130
100 / 156
100 / 143
100 / 136
100 / 151
100 / 145


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 226
200 / 226
100 / 234
200 / 234
100 / 142
100 / 225
200 / 225
100 / 237
200 / 237
100 / 163
100 / 198
100 / 205
200 / 205
100 / 104
100 / 118
100 / 175
100 / 200
200 / 200
100 / 153
100 / 108
100 / 219
200 / 219
100 / 111
100 / 212
200 / 212
100 / 147
100 / 216
200 / 216
100 / 102


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 303
200 / 303
300 / 303
100 / 286
200 / 286
100 / 105
100 / 182
100 / 174
100 / 225
200 / 225
100 / 151
100 / 245
200 / 245
100 / 252
200 / 252
100 / 141
100 / 174
100 / 254
200 / 254
100 / 292
200 / 292
100 / 125
100 / 276
200 / 276
100 / 219
200 / 219
100 / 149
100 / 175
100 / 209
200 / 209
100 / 263
200 / 263
100 / 297
200 / 297
100 / 283
200 / 283
100 / 114
100 / 303
200 / 303
300 / 303
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 354
200 / 354
300 / 354
100 / 377
200 / 377
300 / 377
100 / 143
100 / 411
200 / 411
300 / 411
400 / 411
100 / 115
100 / 344
200 / 344
300 / 344
100 / 345
200 / 345
300 / 345
100 / 368
200 / 368
300 / 368
100 / 305
200 / 305
300 / 305
100 / 315
200 / 315
300 / 315
100 / 183
100 / 316
200 / 316
300 / 316
100 / 313
200 / 313
300 / 313
100 / 236
200 / 236
100 / 160
100 / 225
200 / 225
100 / 262
200 / 262
100 / 248
200 / 248
100 / 363
200 / 363
300 / 363
100 / 253
200 / 253
100 / 185
100 / 351
200 / 351
300 / 351
100 / 226
200 / 226
100 / 354
200 / 354
300 / 354
100 / 353
200 / 353
300 / 353
100 / 344
200 / 344
300 / 344
100 / 237
200 / 237
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 392
200 / 392
300 / 392
100 / 327
200 / 327
300 / 327
100 / 168
100 / 414
200 / 414
300 / 414
400 / 414
100 / 457
200 / 457
300 / 457
400 / 457
100 / 374
200 / 374
300 / 374
100 / 374
200 / 374
300 / 374
100 / 240
200 / 240
100 / 335
200 / 335
300 / 335
100 / 375
200 / 375
300 / 375
100 / 286
200 / 286
100 / 201
200 / 201
100 / 409
200 / 409
300 / 409
400 / 409
100 / 321
200 / 321
300 / 321
100 / 443
200 / 443
300 / 443
400 / 443
100 / 313
200 / 313
300 / 313
100 / 221
200 / 221
100 / 368
200 / 368
300 / 368
100 / 454
200 / 454
300 / 454
400 / 454
100 / 425
200 / 425
300 / 425
400 / 425
100 / 293
200 / 293
100 / 284
200 / 284
100 / 319
200 / 319
300 / 319
100 / 181
100 / 385
200 / 385
300 / 385
100 / 152
100 / 108
100 / 207
200 / 207
Running Strategy: Course Average Fallback...
Running for k=10...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 105
100 / 104
100 / 113


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 171
100 / 163
100 / 134
100 / 160
100 / 146
100 / 173
100 / 107
100 / 181
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 235
200 / 235
100 / 226
200 / 226
100 / 136
100 / 102
100 / 322
200 / 322
300 / 322
100 / 232
200 / 232
100 / 202
200 / 202
100 / 223
200 / 223
100 / 259
200 / 259
100 / 102


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 303
200 / 303
300 / 303
100 / 286
200 / 286
100 / 303
200 / 303
300 / 303
100 / 401
200 / 401
300 / 401
400 / 401
100 / 286
200 / 286
100 / 293
200 / 293
100 / 290
200 / 290
100 / 311
200 / 311
300 / 311
100 / 333
200 / 333
300 / 333


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 443
200 / 443
300 / 443
400 / 443
100 / 384
200 / 384
300 / 384
100 / 416
200 / 416
300 / 416
400 / 416
100 / 366
200 / 366
300 / 366
100 / 352
200 / 352
300 / 352
100 / 358
200 / 358
300 / 358
100 / 360
200 / 360
300 / 360
100 / 170
100 / 147


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 482
200 / 482
300 / 482
400 / 482
100 / 423
200 / 423
300 / 423
400 / 423
100 / 618
200 / 618
300 / 618
400 / 618
500 / 618
600 / 618
100 / 316
200 / 316
300 / 316
100 / 425
200 / 425
300 / 425
400 / 425
100 / 426
200 / 426
300 / 426
400 / 426
100 / 448
200 / 448
300 / 448
400 / 448
100 / 273
200 / 273
100 / 489
200 / 489
300 / 489
400 / 489
100 / 472
200 / 472
300 / 472
400 / 472
Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 105
100 / 101


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 171
100 / 163
100 / 133
100 / 163
100 / 223
200 / 223
100 / 147
100 / 160
100 / 134
100 / 171
100 / 130
100 / 141
100 / 146


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 246
200 / 246
100 / 226
200 / 226
100 / 349
200 / 349
300 / 349
100 / 211
200 / 211
100 / 109
100 / 232
200 / 232
100 / 109
100 / 229
200 / 229
100 / 207
200 / 207
100 / 224
200 / 224
100 / 178
100 / 102


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 296
200 / 296
100 / 271
200 / 271
100 / 322
200 / 322
300 / 322
100 / 338
200 / 338
300 / 338
100 / 293
200 / 293
100 / 285
200 / 285
100 / 133
100 / 255
200 / 255
100 / 156
100 / 289
200 / 289
100 / 297
200 / 297
100 / 197
100 / 283
200 / 283
100 / 114
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 474
200 / 474
300 / 474
400 / 474
100 / 369
200 / 369
300 / 369
100 / 402
200 / 402
300 / 402
400 / 402
100 / 327
200 / 327
300 / 327
100 / 365
200 / 365
300 / 365
100 / 183
100 / 395
200 / 395
300 / 395
100 / 314
200 / 314
300 / 314
100 / 364
200 / 364
300 / 364
100 / 185
100 / 375
200 / 375
300 / 375
100 / 265
200 / 265
100 / 147
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 436
200 / 436
300 / 436
400 / 436
100 / 469
200 / 469
300 / 469
400 / 469
100 / 185
100 / 411
200 / 411
300 / 411
400 / 411
100 / 433
200 / 433
300 / 433
400 / 433
100 / 415
200 / 415
300 / 415
400 / 415
100 / 267
200 / 267
100 / 413
200 / 413
300 / 413
400 / 413
100 / 492
200 / 492
300 / 492
400 / 492
100 / 428
200 / 428
300 / 428
400 / 428
100 / 437
200 / 437
300 / 437
400 / 437
100 / 278
200 / 278
100 / 181
Running for k=20...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105
100 / 104
100 / 105
100 / 104
100 / 101


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 171
100 / 163
100 / 111
100 / 154
100 / 163
100 / 138
100 / 160
100 / 146
100 / 150
100 / 130
100 / 171
100 / 166


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 234
200 / 234
100 / 226
200 / 226
100 / 194
100 / 127
100 / 179
100 / 231
200 / 231
100 / 104
100 / 234
200 / 234
100 / 171
100 / 140
100 / 111
100 / 259
200 / 259
100 / 130
100 / 102
100 / 119
100 / 217
200 / 217


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 303
200 / 303
300 / 303
100 / 286
200 / 286
100 / 272
200 / 272
100 / 258
200 / 258
100 / 326
200 / 326
300 / 326
100 / 266
200 / 266
100 / 245
200 / 245
100 / 279
200 / 279
100 / 141
100 / 174
100 / 271
200 / 271
100 / 269
200 / 269
100 / 290
200 / 290
100 / 210
200 / 210
100 / 198
100 / 297
200 / 297
100 / 283
200 / 283
100 / 156
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 354
200 / 354
300 / 354
100 / 349
200 / 349
300 / 349
100 / 348
200 / 348
300 / 348
100 / 115
100 / 379
200 / 379
300 / 379
100 / 368
200 / 368
300 / 368
100 / 305
200 / 305
300 / 305
100 / 365
200 / 365
300 / 365
100 / 163
100 / 356
200 / 356
300 / 356
100 / 313
200 / 313
300 / 313
100 / 185
100 / 267
200 / 267
100 / 265
200 / 265
100 / 342
200 / 342
300 / 342
100 / 354
200 / 354
300 / 354
100 / 355
200 / 355
300 / 355
100 / 147
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 450
200 / 450
300 / 450
400 / 450
100 / 376
200 / 376
300 / 376
100 / 527
200 / 527
300 / 527
400 / 527
500 / 527
100 / 457
200 / 457
300 / 457
400 / 457
100 / 382
200 / 382
300 / 382
100 / 374
200 / 374
300 / 374
100 / 252
200 / 252
100 / 428
200 / 428
300 / 428
400 / 428
100 / 371
200 / 371
300 / 371
100 / 307
200 / 307
300 / 307
100 / 428
200 / 428
300 / 428
400 / 428
100 / 449
200 / 449
300 / 449
400 / 449
100 / 447
200 / 447
300 / 447
400 / 447
100 / 382
200 / 382
300 / 382
100 / 459
200 / 459
300 / 459
400 / 459
100 / 425
200 / 425
300 / 425
400 / 425
100 / 323
200 / 323
300 / 323
100 / 191
Running for k=25...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 105
100 / 104
100 / 100


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 163
100 / 156
100 / 178
100 / 163
100 / 142
100 / 147
100 / 159
100 / 168
100 / 130
100 / 145
100 / 161
100 / 146


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 234
200 / 234
100 / 226
200 / 226
100 / 309
200 / 309
300 / 309
100 / 179
100 / 231
200 / 231
100 / 104
100 / 212
200 / 212
100 / 175
100 / 214
200 / 214
100 / 157
100 / 111
100 / 130
100 / 100
100 / 252
200 / 252
100 / 102
100 / 119
100 / 105


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 286
200 / 286
100 / 311
200 / 311
300 / 311
100 / 105
100 / 227
200 / 227
100 / 211
200 / 211
100 / 225
200 / 225
100 / 245
200 / 245
100 / 254
200 / 254
100 / 141
100 / 174
100 / 254
200 / 254
100 / 293
200 / 293
100 / 280
200 / 280
100 / 203
200 / 203
100 / 210
200 / 210
100 / 149
100 / 209
200 / 209
100 / 295
200 / 295
100 / 288
200 / 288
100 / 114
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 377
200 / 377
300 / 377
100 / 354
200 / 354
300 / 354
100 / 346
200 / 346
300 / 346
100 / 178
100 / 115
100 / 278
200 / 278
100 / 345
200 / 345
300 / 345
100 / 368
200 / 368
300 / 368
100 / 293
200 / 293
100 / 305
200 / 305
300 / 305
100 / 336
200 / 336
300 / 336
100 / 163
100 / 337
200 / 337
300 / 337
100 / 313
200 / 313
300 / 313
100 / 360
200 / 360
300 / 360
100 / 269
200 / 269
100 / 253
200 / 253
100 / 267
200 / 267
100 / 265
200 / 265
100 / 342
200 / 342
300 / 342
100 / 353
200 / 353
300 / 353
100 / 356
200 / 356
300 / 356
100 / 119
100 / 147
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 391
200 / 391
300 / 391
100 / 466
200 / 466
300 / 466
400 / 466
100 / 121
100 / 457
200 / 457
300 / 457
400 / 457
100 / 374
200 / 374
300 / 374
100 / 374
200 / 374
300 / 374
100 / 240
200 / 240
100 / 429
200 / 429
300 / 429
400 / 429
100 / 371
200 / 371
300 / 371
100 / 433
200 / 433
300 / 433
400 / 433
100 / 321
200 / 321
300 / 321
100 / 428
200 / 428
300 / 428
400 / 428
100 / 221
200 / 221
100 / 347
200 / 347
300 / 347
100 / 379
200 / 379
300 / 379
100 / 461
200 / 461
300 / 461
400 / 461
100 / 436
200 / 436
300 / 436
400 / 436
100 / 266
200 / 266
100 / 326
200 / 326
300 / 326
100 / 278
200 / 278
100 / 191
Running for k=30...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)
  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 105
100 / 104
100 / 105
100 / 104
100 / 101


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 163
100 / 170
100 / 163
100 / 115
100 / 130
100 / 130
100 / 144
100 / 152
100 / 143
100 / 148
100 / 151
100 / 145


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 225
200 / 225
100 / 226
200 / 226
100 / 256
200 / 256
100 / 110
100 / 183
100 / 225
200 / 225
100 / 179
100 / 228
200 / 228
100 / 104
100 / 118
100 / 175
100 / 200
200 / 200
100 / 175
100 / 123
100 / 157
100 / 147
100 / 153
100 / 212
200 / 212
100 / 130
100 / 100
100 / 212
200 / 212
100 / 102
100 / 105


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 286
200 / 286
100 / 296
200 / 296
100 / 285
200 / 285
100 / 158
100 / 156
100 / 159
100 / 225
200 / 225
100 / 151
100 / 245
200 / 245
100 / 263
200 / 263
100 / 141
100 / 276
200 / 276
100 / 254
200 / 254
100 / 125
100 / 286
200 / 286
100 / 292
200 / 292
100 / 210
200 / 210
100 / 224
200 / 224
100 / 197
100 / 262
200 / 262
100 / 280
200 / 280
100 / 114
100 / 287
200 / 287
100 / 297
200 / 297
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 354
200 / 354
300 / 354
100 / 377
200 / 377
300 / 377
100 / 318
200 / 318
300 / 318
100 / 256
200 / 256
100 / 115
100 / 164
100 / 345
200 / 345
300 / 345
100 / 293
200 / 293
100 / 329
200 / 329
300 / 329
100 / 364
200 / 364
300 / 364
100 / 183
100 / 236
200 / 236
100 / 313
200 / 313
300 / 313
100 / 164
100 / 272
200 / 272
100 / 342
200 / 342
300 / 342
100 / 248
200 / 248
100 / 185
100 / 272
200 / 272
100 / 326
200 / 326
300 / 326
100 / 341
200 / 341
300 / 341
100 / 353
200 / 353
300 / 353
100 / 355
200 / 355
300 / 355
100 / 212
200 / 212
100 / 147
100 / 195
100 / 342
200 / 342
300 / 342
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


100 / 423
200 / 423
300 / 423
400 / 423
100 / 420
200 / 420
300 / 420
400 / 420
100 / 446
200 / 446
300 / 446
400 / 446
100 / 459
200 / 459
300 / 459
400 / 459
100 / 121
100 / 419
200 / 419
300 / 419
400 / 419
100 / 374
200 / 374
300 / 374
100 / 374
200 / 374
300 / 374
100 / 257
200 / 257
100 / 335
200 / 335
300 / 335
100 / 375
200 / 375
300 / 375
100 / 426
200 / 426
300 / 426
400 / 426
100 / 201
200 / 201
100 / 252
200 / 252
100 / 388
200 / 388
300 / 388
100 / 321
200 / 321
300 / 321
100 / 316
200 / 316
300 / 316
100 / 428
200 / 428
300 / 428
400 / 428
100 / 398
200 / 398
300 / 398
100 / 268
200 / 268
100 / 454
200 / 454
300 / 454
400 / 454
100 / 412
200 / 412
300 / 412
400 / 412
100 / 284
200 / 284
100 / 250
200 / 250
100 / 378
200 / 378
300 / 378
100 / 390
200 / 390
300 / 390
100 / 453
200 / 453
300 / 453
400 / 453
100 / 305
200 / 305
300 / 305
100 / 169
100 / 207
200 / 207

Sensitivity Analysis (Clustering Model):
------------------------------------------------------------
Fallbac

### User-based Collaborative Filtering

In [34]:
model_predictions = {}

In [77]:
predictions = predict(recommendations.sim_distance, KMeans)
model_predictions['Euclidean Distance'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.372


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.443


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.405


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.357


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.343


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.354


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.410


Mean silhouette for k = 15 EuclideanUser === 0.383

In [79]:
predictions = predict(recommendations.sim_jaccard, KMeans)
model_predictions['Jaccard Index'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.446


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.446


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.397


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.480


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.350


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.341


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.393


Mean silhouette for k = 15 JaccardUser === 0.408

In [81]:
predictions = predict(recommendations.sim_pearson, KMeans)
model_predictions['Pearson Correlation'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.505


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.393


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.397


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.343


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.436


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.441


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.387


Mean silhouette for k = 15 PearsonUser === 0.415

In [19]:
with open('clustering_user_based_collaborative_filtering_WE_results (Course based with KMeans).json', 'w') as fw:
    json.dump(model_predictions, fw)

### Item-based Collaborative Filtering

In [36]:
model_predictions = {}

In [85]:
predictions = predict(recommendations.sim_distance, KMeans, item_based=True)
model_predictions['Euclidean Distance'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.496


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.439
100 / 105
100 / 104


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.354
100 / 171
100 / 163
100 / 157
100 / 130
100 / 155
100 / 160
100 / 182
100 / 119
100 / 126
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.331
100 / 234
200 / 234
100 / 226
200 / 226
100 / 198
100 / 131
100 / 102
100 / 224
200 / 224
100 / 303
200 / 303
300 / 303
100 / 226
200 / 226
100 / 241
200 / 241
100 / 104
100 / 175
100 / 222
200 / 222
100 / 186
100 / 212
200 / 212
100 / 102


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.330
100 / 303
200 / 303
300 / 303
100 / 286
200 / 286
100 / 303
200 / 303
300 / 303
100 / 354
200 / 354
300 / 354
100 / 336
200 / 336
300 / 336
100 / 245
200 / 245
100 / 282
200 / 282
100 / 141
100 / 254
200 / 254
100 / 292
200 / 292
100 / 312
200 / 312
300 / 312
100 / 149
100 / 267
200 / 267
100 / 303
200 / 303
300 / 303


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.343
100 / 377
200 / 377
300 / 377
100 / 354
200 / 354
300 / 354
100 / 394
200 / 394
300 / 394
100 / 428
200 / 428
300 / 428
400 / 428
100 / 140
100 / 409
200 / 409
300 / 409
400 / 409
100 / 306
200 / 306
300 / 306
100 / 365
200 / 365
300 / 365
100 / 175
100 / 357
200 / 357
300 / 357
100 / 314
200 / 314
300 / 314
100 / 280
200 / 280
100 / 321
200 / 321
300 / 321
100 / 360
200 / 360
300 / 360
100 / 154


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.345
100 / 458
200 / 458
300 / 458
400 / 458
100 / 423
200 / 423
300 / 423
400 / 423
100 / 526
200 / 526
300 / 526
400 / 526
500 / 526
100 / 480
200 / 480
300 / 480
400 / 480
100 / 374
200 / 374
300 / 374
100 / 437
200 / 437
300 / 437
400 / 437
100 / 396
200 / 396
300 / 396
100 / 438
200 / 438
300 / 438
400 / 438
100 / 331
200 / 331
300 / 331
100 / 428
200 / 428
300 / 428
400 / 428
100 / 221
200 / 221
100 / 437
200 / 437
300 / 437
400 / 437
100 / 498
200 / 498
300 / 498
400 / 498
100 / 254
200 / 254
100 / 278
200 / 278


Mean silhouette for k = 15 EuclideanItem === 0.377

In [38]:
predictions = predict(recommendations.sim_jaccard, KMeans, item_based=True)
model_predictions['Jaccard Index'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.525


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.413
100 / 105
100 / 105
100 / 109


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.407
100 / 172
100 / 163
100 / 147
100 / 164
100 / 191
100 / 148
100 / 163
100 / 140
100 / 158
100 / 176


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.353
100 / 235
200 / 235
100 / 226
200 / 226
100 / 132
100 / 132
100 / 198
100 / 261
200 / 261
100 / 179
100 / 231
200 / 231
100 / 104
100 / 223
200 / 223
100 / 149
100 / 177
100 / 219
200 / 219


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.450
100 / 303
200 / 303
300 / 303
100 / 427
200 / 427
300 / 427
400 / 427
100 / 310
200 / 310
300 / 310
100 / 173
100 / 151
100 / 245
200 / 245
100 / 293
200 / 293
100 / 141
100 / 282
200 / 282
100 / 213
200 / 213
100 / 149
100 / 209
200 / 209
100 / 317
200 / 317
300 / 317
100 / 114
100 / 119


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.419
100 / 354
200 / 354
300 / 354
100 / 396
200 / 396
300 / 396
100 / 486
200 / 486
300 / 486
400 / 486
100 / 436
200 / 436
300 / 436
400 / 436
100 / 306
200 / 306
300 / 306
100 / 315
200 / 315
300 / 315
100 / 175
100 / 364
200 / 364
300 / 364
100 / 338
200 / 338
300 / 338
100 / 314
200 / 314
300 / 314
100 / 262
200 / 262
100 / 272
200 / 272
100 / 361
200 / 361
300 / 361
100 / 303
200 / 303
300 / 303
100 / 190


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.426
100 / 423
200 / 423
300 / 423
400 / 423
100 / 477
200 / 477
300 / 477
400 / 477
100 / 500
200 / 500
300 / 500
400 / 500
500 / 500
100 / 485
200 / 485
300 / 485
400 / 485
100 / 437
200 / 437
300 / 437
400 / 437
100 / 439
200 / 439
300 / 439
400 / 439
100 / 240
200 / 240
100 / 428
200 / 428
300 / 428
400 / 428
100 / 221
200 / 221
100 / 438
200 / 438
300 / 438
400 / 438
100 / 365
200 / 365
300 / 365
100 / 461
200 / 461
300 / 461
400 / 461
100 / 305
200 / 305
300 / 305
100 / 181
100 / 207
200 / 207


Mean silhouette for k = 15 jaccardItem === 0.428

In [40]:
predictions = predict(recommendations.sim_pearson, KMeans, item_based=True)
model_predictions['Pearson Correlation'] = predictions

Running for k=15...


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=1, silhouette=0.483


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=2, silhouette=0.456
100 / 105
100 / 105
100 / 104
100 / 104


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=3, silhouette=0.429
100 / 163
100 / 170
100 / 158
100 / 146
100 / 155
100 / 171
100 / 181
100 / 105
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=4, silhouette=0.466
100 / 235
200 / 235
100 / 132
100 / 203
200 / 203
100 / 102
100 / 264
200 / 264
100 / 182
100 / 231
200 / 231
100 / 104
100 / 164
100 / 175
100 / 214
200 / 214
100 / 119
100 / 222
200 / 222
100 / 201
200 / 201


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=5, silhouette=0.347
100 / 303
200 / 303
300 / 303
100 / 286
200 / 286
100 / 272
200 / 272
100 / 319
200 / 319
300 / 319
100 / 108
100 / 333
200 / 333
300 / 333
100 / 245
200 / 245
100 / 293
200 / 293
100 / 141
100 / 284
200 / 284
100 / 254
200 / 254
100 / 285
200 / 285
100 / 288
200 / 288
100 / 255
200 / 255
100 / 156


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=6, silhouette=0.388
100 / 381
200 / 381
300 / 381
100 / 354
200 / 354
300 / 354
100 / 507
200 / 507
300 / 507
400 / 507
500 / 507
100 / 458
200 / 458
300 / 458
400 / 458
100 / 330
200 / 330
300 / 330
100 / 315
200 / 315
300 / 315
100 / 183
100 / 316
200 / 316
300 / 316
100 / 363
200 / 363
300 / 363
100 / 160
100 / 380
200 / 380
300 / 380
100 / 376
200 / 376
300 / 376
100 / 265
200 / 265
100 / 147
100 / 162


  train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)


k=15, semester=7, silhouette=0.303
100 / 446
200 / 446
300 / 446
400 / 446
100 / 423
200 / 423
300 / 423
400 / 423
100 / 543
200 / 543
300 / 543
400 / 543
500 / 543
100 / 453
200 / 453
300 / 453
400 / 453
100 / 411
200 / 411
300 / 411
400 / 411
100 / 439
200 / 439
300 / 439
400 / 439
100 / 240
200 / 240
100 / 465
200 / 465
300 / 465
400 / 465
100 / 317
200 / 317
300 / 317
100 / 428
200 / 428
300 / 428
400 / 428
100 / 442
200 / 442
300 / 442
400 / 442
100 / 404
200 / 404
300 / 404
400 / 404
100 / 498
200 / 498
300 / 498
400 / 498
100 / 425
200 / 425
300 / 425
400 / 425
100 / 169


Mean silhouette for k = 15 pearsonItem === 0.410

In [None]:
with open('clustering_item_based_collaborative_filtering_results (Course based with KMeans).json', 'w') as fw:
    json.dump(model_predictions, fw)