# Recommendation System for Scholarship : Grant Me App



Import *Library*

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib as plt
import pathlib

Read the **Datasets**

In [3]:
# Read the data from the CSV file or use an existing dataframe
url = 'https://raw.githubusercontent.com/qannisa/dummydataset/main/v2merged2.csv'
data = pd.read_csv(url)

In [4]:
data

Unnamed: 0,Volunteers,Internships,National_Honor,National_Top3,International_Honor,International_Top3,Certification,Professional_Certification,Scholarship_Type,Scholarship_Name,GPA
0,41,21,0,2,0,0,0,1,[1],['Beasiswa Baznas'],3.30
1,40,15,1,2,0,0,0,0,[1],['Beasiswa Baznas'],3.60
2,31,20,3,2,0,0,0,1,[1],['Beasiswa Baznas'],3.17
3,34,29,3,1,0,0,0,1,[1],['Beasiswa Baznas'],3.91
4,40,33,2,0,0,0,0,1,[1],['Beasiswa Baznas'],3.37
...,...,...,...,...,...,...,...,...,...,...,...
14009,16,4,0,0,1,1,2,1,[1],['Beasiswa XL Future Leaders'],2.85
14010,9,1,1,0,1,1,1,0,[1],['Beasiswa XL Future Leaders'],3.40
14011,19,12,5,0,2,2,1,0,[1],['Beasiswa XL Future Leaders'],3.07
14012,13,4,5,1,5,5,2,3,[1],['Beasiswa XL Future Leaders'],3.24


In [6]:
print(data.columns.tolist())

['Column1;Volunteers;Internships;National_Honor;National_Top3;International_Honor;International_Top3;Certification;Professional_Certification;Scholarships;Scholarship_Type;Scholarship_Name;GPA']


In [12]:
# Separate the columns using semicolons
data[['Column1', 'Volunteers', 'Internships', 'National_Honor', 'National_Top3', 'International_Honor',
      'International_Top3', 'Certification',
      'Professional_Certification', 'Scholarships', 'Scholarship_Type', 'Scholarship_Name', 'GPA']] = data['Column1;Volunteers;Internships;National_Honor;National_Top3;International_Honor;International_Top3;Certification;Professional_Certification;Scholarships;Scholarship_Type;Scholarship_Name;GPA'].str.split(';', expand=True)

# Drop the unnecessary columns
data = data.drop(columns=['Column1;Volunteers;Internships;National_Honor;National_Top3;International_Honor;International_Top3;Certification;Professional_Certification;Scholarships;Scholarship_Type;Scholarship_Name;GPA'])

# Replace empty string values with NaN
data.replace('', np.nan, inplace=True)

# Convert data types to float if needed
data[['Volunteers', 'Internships', 'National_Honor', 'National_Top3', 'International_Honor',
    'International_Top3', 'Certification', 'Professional_Certification',
    'GPA']] = data[['Volunteers', 'Internships', 'National_Honor', 'National_Top3', 'International_Honor',
    'International_Top3', 'Certification', 'Professional_Certification',
    'GPA']].astype(float)

# Replace NaN values with mean or other replacement strategy
data.fillna(data.mean(), inplace=True)

# Get the columns to be used for clustering
X = data[['Volunteers', 'Internships', 'National_Honor', 'National_Top3', 'International_Honor',
    'International_Top3', 'Certification', 'Professional_Certification',
    'GPA']]

  data.fillna(data.mean(), inplace=True)


In [13]:
print(X)

       Volunteers  Internships  National_Honor  National_Top3  \
0            41.0         21.0             0.0            2.0   
1            40.0         15.0             1.0            2.0   
2            31.0         20.0             3.0            2.0   
3            34.0         29.0             3.0            1.0   
4            40.0         33.0             2.0            0.0   
...           ...          ...             ...            ...   
14009        16.0          4.0             0.0            0.0   
14010         9.0          1.0             1.0            0.0   
14011        19.0         12.0             5.0            0.0   
14012        13.0          4.0             5.0            1.0   
14013        10.0         10.0             3.0            1.0   

       International_Honor  International_Top3  Certification  \
0                      0.0                 0.0            0.0   
1                      0.0                 0.0            0.0   
2                      0

Make the **Clustering Model**

In [16]:
# Standardize the data using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create a k-means model with the desired number of clusters
kmeans = KMeans(n_clusters=5, random_state=42)

# Perform clustering on the data
kmeans.fit(X_scaled)

# Add the clustering result column to the dataframe
data['Cluster'] = kmeans.labels_

# Display the clustering result
print(data['Cluster'])

# Import the required libraries for model training and evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, data['Cluster'], test_size=0.2, random_state=42)

# Define the TensorFlow model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)



0        2
1        2
2        2
3        2
4        2
        ..
14009    0
14010    0
14011    3
14012    3
14013    0
Name: Cluster, Length: 14014, dtype: int32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.035183753818273544
Test Accuracy: 0.9857295751571655


Test the Model with **Random Input** and **Plot the Data**

In [15]:
# Inputan baru untuk diprediksi
new_input = np.array([[3.8, 5, 3, 2, 1, 1, 1, 14, 13]])  # Ganti dengan inputan yang sesuai

# Lakukan standardisasi pada inputan baru
new_input_scaled = scaler.transform(new_input)

# Lakukan prediksi cluster
predicted_cluster = kmeans.predict(new_input_scaled)

# Tampilkan hasil prediksi
print("Inputan masuk ke cluster:", predicted_cluster)
if predicted_cluster == 0:
    print("Pemerintah")
elif predicted_cluster == 1:
    print("Swasta")
elif predicted_cluster == 2:
    print("Organisasi")
elif predicted_cluster == 3:
    print("Prestasi")
elif predicted_cluster == 4:
    print("Bantuan")

Inputan masuk ke cluster: [4]
Bantuan




Save the **Model**

In [17]:
# Save the model
model.save('model.h5')

  saving_api.save_model(


Create **Save Model Folder**

In [None]:
export_dir = 'saved_model/1'
tf.saved_model.save(model, export_dir)

INFO:tensorflow:Assets written to: saved_model/1\assets


INFO:tensorflow:Assets written to: saved_model/1\assets


Convert Saved Model to TfLite

In [None]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()

In [None]:
tflite_model_file = pathlib.Path('model.tflite')
tflite_model_file.write_bytes(tflite_model)

15864