In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import recommendations
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, silhouette_samples

In [2]:
df = pd.read_csv('../datasets/SEHIR/processed_course_clustering_dataset.csv')
df = df[['Student Number', 'Course Code', 'Letter Grade', 'Semester', 'Course Credit', 'Course Year'] + list(df.columns[20:])]
df

Unnamed: 0,Student Number,Course Code,Letter Grade,Semester,Course Credit,Course Year,Subject,A+ rate,A rate,A- rate,...,C rate,C- rate,D+ rate,D rate,D- rate,F rate,Mean GPA - Students taken,Mean Grade - Students taken,STDEV GPA - Students taken,STDEV Grade - Students taken
0,240,UNI 111,F,2011 - Fall,3,1,UNI,0.013605,0.115646,0.149660,...,0.054422,0.040816,0.047619,0.020408,0.020408,0.088435,2.467279,2.680952,0.864273,1.193667
1,338,UNI 107,A,2011 - Fall,3,1,UNI,0.000000,0.318182,0.181818,...,0.090909,0.136364,0.045455,0.000000,0.045455,0.000000,2.893182,2.986364,0.821569,1.098612
2,338,UNI 105,A,2011 - Fall,3,1,UNI,0.115385,0.269231,0.038462,...,0.038462,0.000000,0.000000,0.000000,0.000000,0.038462,2.713846,3.211538,0.952080,0.937369
3,338,UNI 103,A,2011 - Fall,3,1,UNI,0.010929,0.153005,0.147541,...,0.081967,0.032787,0.000000,0.021858,0.027322,0.065574,2.483224,2.813115,0.977342,1.117219
4,240,UNI 105,A,2011 - Fall,3,1,UNI,0.115385,0.269231,0.038462,...,0.038462,0.000000,0.000000,0.000000,0.000000,0.038462,2.713846,3.211538,0.952080,0.937369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,1102,MGT 531,A-,2014 - Spring,3,5,MGT,0.000000,0.237805,0.420732,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.416209,3.391758,0.579819,0.793752
48737,1102,MGT 585,A,2014 - Spring,3,5,MGT,0.000000,0.297521,0.314050,...,0.008264,0.000000,0.000000,0.000000,0.000000,0.000000,3.404921,3.440476,0.535670,0.751231
48738,1102,MGT 552,A,2014 - Spring,3,5,MGT,0.000000,0.354331,0.251969,...,0.055118,0.000000,0.000000,0.000000,0.000000,0.031496,3.449615,3.393077,0.454990,0.824827
48739,1984,MGT 574,A-,2014 - Spring,3,5,MGT,0.000000,0.393443,0.327869,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.352769,3.570769,0.467632,0.708811


In [3]:
df = pd.concat([df, pd.get_dummies(df['Subject'], prefix='Subject'), pd.get_dummies(df['Course Year'], prefix='Course Year')], axis=1)
df.drop(['Subject', 'Course Year'], axis=1, inplace=True)

In [4]:
df

Unnamed: 0,Student Number,Course Code,Letter Grade,Semester,Course Credit,A+ rate,A rate,A- rate,B+ rate,B rate,...,Subject_PSY,Subject_SOC,Subject_SPA,Subject_UNI,Course Year_1,Course Year_2,Course Year_3,Course Year_4,Course Year_5,Course Year_6
0,240,UNI 111,F,2011 - Fall,3,0.013605,0.115646,0.149660,0.163265,0.156463,...,False,False,False,True,True,False,False,False,False,False
1,338,UNI 107,A,2011 - Fall,3,0.000000,0.318182,0.181818,0.045455,0.090909,...,False,False,False,True,True,False,False,False,False,False
2,338,UNI 105,A,2011 - Fall,3,0.115385,0.269231,0.038462,0.153846,0.153846,...,False,False,False,True,True,False,False,False,False,False
3,338,UNI 103,A,2011 - Fall,3,0.010929,0.153005,0.147541,0.142077,0.196721,...,False,False,False,True,True,False,False,False,False,False
4,240,UNI 105,A,2011 - Fall,3,0.115385,0.269231,0.038462,0.153846,0.153846,...,False,False,False,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48736,1102,MGT 531,A-,2014 - Spring,3,0.000000,0.237805,0.420732,0.207317,0.134146,...,False,False,False,False,False,False,False,False,True,False
48737,1102,MGT 585,A,2014 - Spring,3,0.000000,0.297521,0.314050,0.223140,0.115702,...,False,False,False,False,False,False,False,False,True,False
48738,1102,MGT 552,A,2014 - Spring,3,0.000000,0.354331,0.251969,0.181102,0.070866,...,False,False,False,False,False,False,False,False,True,False
48739,1984,MGT 574,A-,2014 - Spring,3,0.000000,0.393443,0.327869,0.278689,0.000000,...,False,False,False,False,False,False,False,False,True,False


In [5]:
numerical_grades = {'A+': 4.1, 'A': 4.0, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, 'C+': 2.3, 'C': 2.0,
                    'C-': 1.7, 'D+': 1.3, 'D': 1.0, 'D-': 0.5, 'F': 0.0}

In [6]:
course_credits = {}
for row_idx in df.index:
    course_code = df.iloc[row_idx, 1]
    credit = df.iloc[row_idx, 4]    
    course_credits[course_code] = credit

In [7]:
def get_semester_data(semester_name):
    semester_data = {}   # semester data in shape {student_number: {course_code: letter_grade, ...}, ...}
    
    # extracting the instances with the given semester_name from the main dataFrame
    dataset = df[df.iloc[:, 3] == semester_name]
    dataset.index = range(len(dataset))
    
    # filling the semester_data dictionary
    for row_idx in dataset.index:
        student_number = dataset.iloc[row_idx, 0]
        course_code = dataset.iloc[row_idx, 1]
        letter_grade = dataset.iloc[row_idx, 2]
        
        semester_data.setdefault(student_number, {})
        semester_data[student_number][course_code] = numerical_grades[letter_grade]
    
    return semester_data

In [8]:
def get_avg_gpa(train_semester, student):
    courses = train_semester[student]
    total_credit = 0
    weights = 0
    for course in courses:
        total_credit += course_credits[course]
        weights += courses[course] * course_credits[course]
    
    return weights / total_credit

In [9]:
def get_grade_stats(semester_data, student):
    grade_list = []
    
    for course in semester_data[student]:
        numerical_grade = semester_data[student][course]
        grade_list.append(numerical_grade)
    
    mean = np.mean(grade_list)
    std_dev = np.std(grade_list)
    
    return mean, std_dev

In [10]:
def fit_cluster(train_sems, num_clusters, training_data, cluster_model):
    train_dataset = pd.DataFrame(columns=df.columns)
    
    # extracting instances from the dataset which should be in training data
    for sem in train_sems:
        train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)

    cluster_features = train_dataset[list(train_dataset.columns[4:])]
    
    # fitting a clustering model based on GPA, Completed Credits and Departments
    fitted_cluster_model = cluster_model(n_clusters=num_clusters).fit(cluster_features)
    cluster_labels = fitted_cluster_model.labels_

    # ===== Silhouette metrics =====
    sil_score = silhouette_score(cluster_features, cluster_labels)

    sil_samples = silhouette_samples(cluster_features, cluster_labels)
    cluster_silhouette = {}

    for lbl in np.unique(cluster_labels):
        cluster_silhouette[lbl] = sil_samples[cluster_labels == lbl].mean()
    # ==============================

    
    cluster_dataset = {}   # splitting the train dataset into sub-dicts based on their predicted cluster label
    
    # assigning each students' data to their predicted clusters
    for i in range(len(cluster_labels)):
        cluster_dataset.setdefault(cluster_labels[i], {})
        student_number = train_dataset.iloc[i, 0]
        cluster_dataset[cluster_labels[i]][student_number] = training_data[student_number]
    
    return cluster_dataset, fitted_cluster_model, sil_score, cluster_silhouette

In [11]:
def cluster_test_data(cluster_model, semester_name):
    # extracting all instances with the given semester_name from the main dataFrame
    test_dataset = df[df.iloc[:, 3] == semester_name]
    test_dataset.index = range(len(test_dataset))
    
    # predicting the cluster labels of test data using a cluster model fitted on the train data so far
    cluster_features = test_dataset[list(test_dataset.columns[4:])]
    cluster_labels = cluster_model.predict(cluster_features)
    
    # getting the semester data of available students in test semester
    semester_data = get_semester_data(semester_name)
    
    cluster_dataset = {}   # splitting the test dataset into sub-dicts based on their predicted cluster label
    
    # assigning each students' data to their predicted clusters
    for i in range(len(cluster_labels)):
        cluster_dataset.setdefault(cluster_labels[i], {})
        student_number = test_dataset.iloc[i, 0]
        cluster_dataset[cluster_labels[i]][student_number] = semester_data[student_number]
        
    return cluster_dataset    

In [12]:
def get_course_stats(train_semester):
    # Eğitim verisindeki her dersin ortalamasını hesaplar
    course_grades = {}
    all_grades = []
    
    for student, courses in train_semester.items():
        for course, grade in courses.items():
            course_grades.setdefault(course, []).append(grade)
            all_grades.append(grade)
            
    course_means = {c: np.mean(g) for c, g in course_grades.items()}
    global_mean = np.mean(all_grades) if all_grades else 0
    
    return course_means, global_mean

### scarsity ratio 

In [22]:
train_dataset = pd.DataFrame(columns=df.columns)
sorted_semesters = sorted(set(df.iloc[:, 3]))
for sem in sorted_semesters:
        train_dataset = pd.concat([train_dataset, df[df.iloc[:, 3] == sem]], ignore_index=True)

scar = train_dataset[train_dataset.columns[4:]]
print(f"Scarsity = {scar.eq(False).sum().sum() / (scar.shape[0] * scar.shape[1])*100:.2f}%")

Scarsity = 70.81%


In [13]:
def get_errors(train_semester, test_semester, sim, item_based, sim_matrix=None, model_type='local'):
    average_gpa = {}
    y_true = []
    y_pred = []
    sources = []  # <--- YENİ: Kaynakları tutacak liste
    gpa = {}
    
    # 1. GPA Hazırlığı
    for student in train_semester:
        gpa[student] = get_avg_gpa(train_semester, student)
        average_gpa[student] = gpa[student]

    # 2. Benzerlik Matrisi
    current_sims = sim_matrix
    if item_based and current_sims is None:
        current_sims = recommendations.calculateSimilarItems(train_semester)

    # 3. Tahmin Döngüsü
    for student in train_semester:
        if student not in test_semester: continue

        # --- CF ÖNERİLERİ ---
        recommended_courses = {}
        if item_based:
            recs = recommendations.getRecommendedItems(train_semester, current_sims, student)
        else:
            recs = recommendations.getRecommendations(train_semester, student, sim, dgpa=True, gpa=gpa, delta=0.7)
        
        for rec_grade, rec_course in recs:
            recommended_courses[rec_course] = rec_grade
            
        mean, std_dev = get_grade_stats(train_semester, student)
            
        for course_code in test_semester[student]:
            final_pred = None
            source_tag = "" # <--- Etiket
            
            # DURUM 1: CF Tahmini (Local veya Global)
            if course_code in recommended_courses:
                final_pred = recommended_courses[course_code]
                source_tag = "CF" # (İstersen f"{model_type}_CF" diyerek Local/Global ayırabilirsin)
            
            # DURUM 2: Fallback (GPA)
            else:
                final_pred = average_gpa[student]
                source_tag = "Fallback"

            # Outlier Kontrolü
            if std_dev > 0:
                if final_pred < mean - (2 * std_dev) or final_pred > mean + (2 * std_dev):
                    continue
            
            y_pred.append(final_pred)
            y_true.append(test_semester[student][course_code])
            sources.append(source_tag) # <--- Kaynağı kaydet
            
    return y_true, y_pred, sources # <--- 3 değer döndür

In [25]:
def predict(sim, cluster_model, item_based=False, threshold=190):
    predictions = {}
    sorted_semesters = sorted(set(df.iloc[:, 3])) 
    
    for num_clusters in range(10, 31, 5):
        print(f"\n---> Running Adaptive Framework for k={num_clusters}...")
        predictions.setdefault(str(num_clusters), {})
        
        silhouette_list = []
        train_semester = {}
        
        for sem_idx in range(1, len(sorted_semesters)):
            # --- YENİ: sources listesi eklendi ---
            predictions[str(num_clusters)].setdefault(str(sem_idx), {'y_true': [], 'y_pred': [], 'sources': []})
            
            new_semester = get_semester_data(sorted_semesters[sem_idx-1])
            for student in new_semester:
                if student in train_semester:
                    train_semester[student].update(new_semester[student])
                else:
                    train_semester[student] = new_semester[student]
            
            # Phase 2: Global Sim
            global_sims = None
            if item_based:
                global_sims = recommendations.calculateSimilarItems(train_semester)
            
            # Phase 1: Clustering
            training_semesters_name = sorted_semesters[:sem_idx]
            train_cluster_data, fitted_cluster_model, sil_score, _ = fit_cluster(
                training_semesters_name, num_clusters, train_semester, cluster_model
            )
            
            test_semester_name = sorted_semesters[sem_idx]
            test_cluster_data = cluster_test_data(fitted_cluster_model, test_semester_name)
            
            # Phase 3: Adaptive Inference
            for cluster_label in train_cluster_data:
                if cluster_label not in test_cluster_data: continue
                
                cluster_size = len(train_cluster_data[cluster_label])
                
                # Değişkenleri hazırla
                current_y_true, current_y_pred, current_sources = [], [], []

                if cluster_size < threshold:
                    # GLOBAL MODEL (Sparse Cluster)
                    current_y_true, current_y_pred, current_sources = get_errors(
                        train_semester,          
                        test_cluster_data[cluster_label], 
                        sim, item_based, 
                        sim_matrix=global_sims,
                        model_type='global' # İstatistik için not düşüyoruz
                    )
                else:
                    # LOCAL MODEL (Dense Cluster)
                    current_y_true, current_y_pred, current_sources = get_errors(
                        train_cluster_data[cluster_label], 
                        test_cluster_data[cluster_label], 
                        sim, item_based, 
                        sim_matrix=None,
                        model_type='local'
                    )
                
                # --- SONUÇLARI BİRLEŞTİR ---
                predictions[str(num_clusters)][str(sem_idx)]['y_true'] += current_y_true
                predictions[str(num_clusters)][str(sem_idx)]['y_pred'] += current_y_pred
                predictions[str(num_clusters)][str(sem_idx)]['sources'] += current_sources # <--- EKLENDİ

            print(f"  Sem {sem_idx} Done. Silhouette Score: {sil_score:.3f}")

            silhouette_list.append(sil_score)

        print(f"\nMean Silhouette Scores for k={num_clusters}: {np.mean(silhouette_list):.3f}")

    return predictions

### User-based Collaborative Filtering

In [29]:
model_predictions = {}

In [30]:
predictions = predict(recommendations.sim_distance, KMeans)
model_predictions['Euclidean Distance'] = predictions


---> Running Adaptive Framework for k=10...
  Sem 1 Done. Silhouette Score: 0.457
  Sem 2 Done. Silhouette Score: 0.397
  Sem 3 Done. Silhouette Score: 0.352
  Sem 4 Done. Silhouette Score: 0.417
  Sem 5 Done. Silhouette Score: 0.400
  Sem 6 Done. Silhouette Score: 0.306
  Sem 7 Done. Silhouette Score: 0.368

Mean Silhouette Scores for k=10: 0.385

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.484
  Sem 2 Done. Silhouette Score: 0.418
  Sem 3 Done. Silhouette Score: 0.407
  Sem 4 Done. Silhouette Score: 0.342
  Sem 5 Done. Silhouette Score: 0.323
  Sem 6 Done. Silhouette Score: 0.344
  Sem 7 Done. Silhouette Score: 0.340

Mean Silhouette Scores for k=15: 0.380

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.507
  Sem 2 Done. Silhouette Score: 0.410
  Sem 3 Done. Silhouette Score: 0.360
  Sem 4 Done. Silhouette Score: 0.377
  Sem 5 Done. Silhouette Score: 0.355
  Sem 6 Done. Silhouette Score: 0.373
  Sem 7 Done. Silhouette 

In [31]:
predictions = predict(recommendations.sim_jaccard, KMeans)
model_predictions['Jaccard Index'] = predictions


---> Running Adaptive Framework for k=10...


  Sem 1 Done. Silhouette Score: 0.555
  Sem 2 Done. Silhouette Score: 0.369
  Sem 3 Done. Silhouette Score: 0.348
  Sem 4 Done. Silhouette Score: 0.424
  Sem 5 Done. Silhouette Score: 0.407
  Sem 6 Done. Silhouette Score: 0.372
  Sem 7 Done. Silhouette Score: 0.383

Mean Silhouette Scores for k=10: 0.408

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.493
  Sem 2 Done. Silhouette Score: 0.436
  Sem 3 Done. Silhouette Score: 0.404
  Sem 4 Done. Silhouette Score: 0.361
  Sem 5 Done. Silhouette Score: 0.339
  Sem 6 Done. Silhouette Score: 0.352
  Sem 7 Done. Silhouette Score: 0.397

Mean Silhouette Scores for k=15: 0.397

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.534
  Sem 2 Done. Silhouette Score: 0.359
  Sem 3 Done. Silhouette Score: 0.430
  Sem 4 Done. Silhouette Score: 0.404
  Sem 5 Done. Silhouette Score: 0.373
  Sem 6 Done. Silhouette Score: 0.473
  Sem 7 Done. Silhouette Score: 0.356

Mean Silhouette Scores for k=20

In [32]:
predictions = predict(recommendations.sim_pearson, KMeans)
model_predictions['Pearson Correlation'] = predictions


---> Running Adaptive Framework for k=10...
  Sem 1 Done. Silhouette Score: 0.491
  Sem 2 Done. Silhouette Score: 0.412
  Sem 3 Done. Silhouette Score: 0.369
  Sem 4 Done. Silhouette Score: 0.324
  Sem 5 Done. Silhouette Score: 0.390
  Sem 6 Done. Silhouette Score: 0.391
  Sem 7 Done. Silhouette Score: 0.336

Mean Silhouette Scores for k=10: 0.388

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.486
  Sem 2 Done. Silhouette Score: 0.429
  Sem 3 Done. Silhouette Score: 0.398
  Sem 4 Done. Silhouette Score: 0.360
  Sem 5 Done. Silhouette Score: 0.444
  Sem 6 Done. Silhouette Score: 0.437
  Sem 7 Done. Silhouette Score: 0.315

Mean Silhouette Scores for k=15: 0.410

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.537
  Sem 2 Done. Silhouette Score: 0.350
  Sem 3 Done. Silhouette Score: 0.401
  Sem 4 Done. Silhouette Score: 0.352
  Sem 5 Done. Silhouette Score: 0.373
  Sem 6 Done. Silhouette Score: 0.385
  Sem 7 Done. Silhouette 

In [33]:
with open('clustering_user_based_collaborative_filtering_results (Course based with KMeans).json', 'w') as fw:
    json.dump(model_predictions, fw)

### Item-based Collaborative Filtering

In [34]:
model_predictions = {}

In [35]:
predictions = predict(recommendations.sim_distance, KMeans, item_based=True)
model_predictions['Euclidean Distance'] = predictions


---> Running Adaptive Framework for k=10...
  Sem 1 Done. Silhouette Score: 0.572
  Sem 2 Done. Silhouette Score: 0.412
  Sem 3 Done. Silhouette Score: 0.362
  Sem 4 Done. Silhouette Score: 0.436
  Sem 5 Done. Silhouette Score: 0.307
  Sem 6 Done. Silhouette Score: 0.399
  Sem 7 Done. Silhouette Score: 0.377

Mean Silhouette Scores for k=10: 0.409

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.533
  Sem 2 Done. Silhouette Score: 0.381
  Sem 3 Done. Silhouette Score: 0.405
  Sem 4 Done. Silhouette Score: 0.334
  Sem 5 Done. Silhouette Score: 0.316
  Sem 6 Done. Silhouette Score: 0.342
  Sem 7 Done. Silhouette Score: 0.353

Mean Silhouette Scores for k=15: 0.381

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.527
  Sem 2 Done. Silhouette Score: 0.383
  Sem 3 Done. Silhouette Score: 0.375
  Sem 4 Done. Silhouette Score: 0.404
  Sem 5 Done. Silhouette Score: 0.482
  Sem 6 Done. Silhouette Score: 0.372
  Sem 7 Done. Silhouette 

In [36]:
predictions = predict(recommendations.sim_jaccard, KMeans, item_based=True)
model_predictions['Jaccard Index'] = predictions


---> Running Adaptive Framework for k=10...
  Sem 1 Done. Silhouette Score: 0.479
  Sem 2 Done. Silhouette Score: 0.390
  Sem 3 Done. Silhouette Score: 0.361
  Sem 4 Done. Silhouette Score: 0.437
  Sem 5 Done. Silhouette Score: 0.295
  Sem 6 Done. Silhouette Score: 0.286
  Sem 7 Done. Silhouette Score: 0.351

Mean Silhouette Scores for k=10: 0.371

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.506
  Sem 2 Done. Silhouette Score: 0.321
  Sem 3 Done. Silhouette Score: 0.392
  Sem 4 Done. Silhouette Score: 0.351
  Sem 5 Done. Silhouette Score: 0.424
  Sem 6 Done. Silhouette Score: 0.320
  Sem 7 Done. Silhouette Score: 0.411

Mean Silhouette Scores for k=15: 0.389

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.561
  Sem 2 Done. Silhouette Score: 0.417
  Sem 3 Done. Silhouette Score: 0.440
  Sem 4 Done. Silhouette Score: 0.365
  Sem 5 Done. Silhouette Score: 0.370
  Sem 6 Done. Silhouette Score: 0.469
  Sem 7 Done. Silhouette 

In [37]:
predictions = predict(recommendations.sim_pearson, KMeans, item_based=True)
model_predictions['Pearson Correlation'] = predictions


---> Running Adaptive Framework for k=10...
  Sem 1 Done. Silhouette Score: 0.436
  Sem 2 Done. Silhouette Score: 0.412
  Sem 3 Done. Silhouette Score: 0.467
  Sem 4 Done. Silhouette Score: 0.445
  Sem 5 Done. Silhouette Score: 0.431
  Sem 6 Done. Silhouette Score: 0.380
  Sem 7 Done. Silhouette Score: 0.351

Mean Silhouette Scores for k=10: 0.417

---> Running Adaptive Framework for k=15...
  Sem 1 Done. Silhouette Score: 0.465
  Sem 2 Done. Silhouette Score: 0.435
  Sem 3 Done. Silhouette Score: 0.396
  Sem 4 Done. Silhouette Score: 0.356
  Sem 5 Done. Silhouette Score: 0.459
  Sem 6 Done. Silhouette Score: 0.427
  Sem 7 Done. Silhouette Score: 0.339

Mean Silhouette Scores for k=15: 0.411

---> Running Adaptive Framework for k=20...
  Sem 1 Done. Silhouette Score: 0.536
  Sem 2 Done. Silhouette Score: 0.437
  Sem 3 Done. Silhouette Score: 0.332
  Sem 4 Done. Silhouette Score: 0.393
  Sem 5 Done. Silhouette Score: 0.381
  Sem 6 Done. Silhouette Score: 0.480
  Sem 7 Done. Silhouette 

In [38]:
with open('clustering_item_based_collaborative_filtering_results (Course based with KMeans).json', 'w') as fw:
    json.dump(model_predictions, fw)