<a href="https://colab.research.google.com/github/HaibaraAi-afk/Capstone/blob/main/Modeling_KNeighbors_FinPockets_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import FuncFormatter
import tensorflow as tf
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV,cross_validate
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/HaibaraAi-afk/Capstone/refs/heads/main/Dataset/converted_data.csv?token=GHSAT0AAAAAACX26274XJYSFK23IZKQHISYZZZN7FA')
df

#Data Preprocessing

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df = df.drop(columns=["Occupation", "City_Tier",])

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
df['Dependents'] = df['Dependents'].astype(str)

# Fungsi untuk mengubah format sumbu Y
def format_func(value, tick_number):
    return f'{value/1e7:.1f} Jt'  # Mengubah ke juta

plt.figure(figsize=(10, 6))
plt.scatter(df['Dependents'], df['Income'], color='blue', alpha=0.5)  # Menggunakan scatter plot
plt.ylabel('Income (dalam juta)', fontsize=12)
plt.xlabel('Jumlah Dependents', fontsize=12)
plt.title('Income berdasarkan Jumlah Dependents', fontsize=14)

# Mengatur format sumbu Y
plt.gca().yaxis.set_major_formatter(FuncFormatter(format_func))

# Mengatur batas sumbu X
plt.xticks(df['Dependents'].unique())
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Feature Engineering
df['Log_Income'] = np.log1p(df['Income'])  # Transformasi logaritmik
df['Income_Age_Interaction'] = df['Income'] * df['Age']  # Fitur interaksi

In [None]:
print(df['Income'].min(), df['Income'].max())

In [None]:
# Memilih fitur dan target
X = df[['Income', 'Age', 'Dependents']]
y = df[['Bills', 'Groceries', 'Transport', 'Entertainment', 'Healthcare', 'Education', 'Utilities', 'Disposable_Income']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def cross_val(model):
    # Menghitung skor cross-validation untuk model yang diberikan
    pred = cross_val_score(model, X, y, cv=10)
    return pred.mean()  # Mengembalikan rata-rata skor

# Evaluasi Model
def print_evaluate(y_true, y_pred, set_name):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"{set_name} Evaluation:")
    print(f"MAE: {mae:.2f}")
    print(f"MSE: {mse:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"R²: {r2:.4f}")
    print("-" * 30)



def evaluate(true, predicted):
    # Menghitung dan mengembalikan berbagai metrik evaluasi
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(mse)
    r2_square = metrics.r2_score(true, predicted)
    return mae, mse, rmse, r2_square

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
def add_model_results(results_df, model_name, y_true, y_pred, best_params, model):
    """
    Menambahkan hasil evaluasi model ke dalam DataFrame.

    Parameters:
    - results_df: DataFrame yang ada untuk menyimpan hasil.
    - model_name: Nama model yang digunakan.
    - y_true: Nilai sebenarnya dari target.
    - y_pred: Nilai prediksi dari model.
    - best_params: Parameter terbaik dari model.
    - model: Model yang digunakan untuk cross-validation.

    Returns:
    - DataFrame yang diperbarui dengan hasil baru.
    """
    # Evaluasi model
    mae, mse, rmse, r2_square = evaluate(y_true, y_pred)

    # Cross-validation
    cross_val_score = cross_val(model)  # Pastikan fungsi cross_val sudah didefinisikan

    # Membuat DataFrame baru untuk hasil model
    results_df_new = pd.DataFrame(data=[
        [model_name,
         mae, mse, rmse, r2_square,
         best_params,
         cross_val_score]
    ],
    columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square', 'Best Parameters', 'Cross_Val'])

    # Menggabungkan DataFrame baru dengan DataFrame yang ada
    results_df = pd.concat([results_df, results_df_new], ignore_index=True)

    return results_df

#EDA

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(df['Income'] / 1e7, bins=30, color='Blue', alpha=0.5)
plt.xlabel('Income (dalam juta)', fontsize=12)
plt.ylabel('Frekuensi', fontsize=12)
plt.title('Distribusi Income', fontsize=14)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Korelasi Antar Fitur')
plt.show()

In [None]:
# Visualisasi outlier menggunakan boxplot
plt.figure(figsize=(12, 6))
sns.boxplot(data=df[['Income', 'Bills', 'Groceries', 'Transport', 'Entertainment', 'Healthcare', 'Education', 'Utilities', 'Disposable_Income']])
plt.xticks(rotation=45)
plt.show()

#Modeling KNeighbors


In [None]:
# Mendefinisikan Parameter Grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],               # Jumlah tetangga
    'weights': ['uniform', 'distance'],        # Metode pemberian bobot
    'p': [1, 2]                                # Jenis jarak: 1 untuk Manhattan, 2 untuk Euclidean
}

# Inisialisasi model KNeighborsRegressor
knn = KNeighborsRegressor()

# Grid Search
grid_search = GridSearchCV(
    estimator=knn,
    param_grid=param_grid,
    cv=5,              # 5-fold cross-validation
    n_jobs=-1,         # Gunakan semua core
    scoring='accuracy' # Metode evaluasi
)

# Melatih model
grid_search.fit(X_train_scaled, y_train)

# Dapatkan Model Terbaik
best_knn = grid_search.best_estimator_

model = best_knn

In [None]:
#Cetak Parameter Terbaik
print("Best Parameters:", grid_search.best_params_)

In [None]:
train_pred = best_knn.predict(X_train_scaled)
test_pred = best_knn.predict(X_test_scaled)

In [None]:
#Evaluasi Model
print('Test set evaluation:')
print_evaluate(y_test, test_pred, set_name='Test Set')
print('Train set evaluation:')
print_evaluate(y_train, train_pred, set_name='Train Set')

In [None]:
results_df = pd.DataFrame(data=[
    ["KNeighbors Regressor",
     *evaluate(y_test, test_pred),
     grid_search.best_params_,
     cross_val(model)]
],
columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square', 'Best Parameters', 'Cross_Val'])

In [None]:
results_df

#Testing

In [None]:
def test_model(model, scaler):
   # Mengambil input dari pengguna
    income = float(input("Masukkan Income: "))
    age = float(input("Masukkan Age: "))
    dependents = int(input("Masukkan Jumlah Dependents: "))

    # Membuat DataFrame dari input
    input_data = pd.DataFrame([[income, age, dependents]], columns=['Income', 'Age', 'Dependents'])

    # Menstandarisasi input
    input_scaled = scaler.transform(input_data)

    # Menghasilkan prediksi
    prediction = model.predict(input_scaled)

    return prediction

In [None]:
# Menguji model dengan input dari pengguna
predicted_output = test_model(best_knn, scaler)

# Menampilkan hasil prediksi
print("Hasil Prediksi:")
print(predicted_output)

#KNeighbors TF

In [None]:
# Inisialisasi nama model
model_name = "kneighbors_tf_model"  # Nama model dapat diubah sesuai kebutuhan
export_path = f"./{model_name}"  # Direktori penyimpanan model


In [None]:
# Step 1: Latih model KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3, weights="uniform", p=1)  # Contoh parameter
knn.fit(X, y)

# Step 2: KNeighborsRegressor untuk membuat prediksi
y_pred_knn = knn.predict(X)

# Step 3: Definisikan model TensorFlow
tf_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X.shape[1],)),  # Input shape sama dengan jumlah fitur
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(y.shape[1])  # Output shape sama dengan jumlah target
])

# Compile model
tf_model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Step 4: Latih model TensorFlow dengan input X dan target dari prediksi KNeighbors
tf_model.fit(X, y_pred_knn, epochs=50, batch_size=32, verbose=1)


In [None]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(model, X_train, X_test, y_train, y_test):
    """
    Mengevaluasi model pada training set dan test set, lalu menyusun tabel evaluasi.

    Parameters:
    - model: Model yang akan dievaluasi.
    - X_train, X_test: Fitur training dan testing.
    - y_train, y_test: Target training dan testing.

    Returns:
    - DataFrame berisi metrik evaluasi.
    """
    # Prediksi untuk training dan test set
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Hitung metrik untuk training set
    train_mae = mean_absolute_error(y_train, y_train_pred)
    train_mse = mean_squared_error(y_train, y_train_pred)
    train_rmse = np.sqrt(train_mse)
    train_r2 = r2_score(y_train, y_train_pred)

    # Hitung metrik untuk test set
    test_mae = mean_absolute_error(y_test, y_test_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    test_rmse = np.sqrt(test_mse)
    test_r2 = r2_score(y_test, y_test_pred)

    # Susun hasil ke dalam DataFrame
    eval_table = pd.DataFrame({
        "Dataset": ["Training", "Test"],
        "MAE": [train_mae, test_mae],
        "MSE": [train_mse, test_mse],
        "RMSE": [train_rmse, test_rmse],
        "R²": [train_r2, test_r2]
    })

    return eval_table

# Contoh penggunaan
evaluation_table = evaluate_model(best_knn, X_train_scaled, X_test_scaled, y_train, y_test)
print(evaluation_table)


#Testing

In [None]:
# Fungsi untuk memprediksi berdasarkan input pengguna menggunakan KNeighbors
def predict_user_input(income, age, dependents):
    # Preprocess the input into a DataFrame
    input_data = pd.DataFrame({
        'Income': [income],
        'Age': [age],
        'Dependents': [dependents]
    })

    # Buat prediksi dengan KNeighbors
    knn_prediction = knn.predict(input_data)

    # Buat prediksi dengan model TensorFlow
    tf_prediction = tf_model.predict(input_data)

    return knn_prediction, tf_prediction


In [None]:
# Input dari pengguna
income = float(input("Masukkan pendapatan: "))
age = int(input("Masukkan usia: "))
dependents = int(input("Masukkan jumlah tanggungan: "))

# Dapatkan prediksi
knn_pred, tf_pred = predict_user_input(income, age, dependents)

# Tampilkan hasil prediksi
print("Prediksi dari model KNeighbors:", knn_pred)
print("Prediksi dari model TensorFlow:", tf_pred)

#Deploy

In [None]:
# Save model in Keras native format
model_name = "tf_model_final.keras"  # Use .keras extension
export_path = f"./{model_name}"
tf_model.save(export_path)
print(f"Model saved to: {export_path}")

# Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(tf_model)
tflite_model = converter.convert()

# Save TFLite model
tflite_path = "tf_model.tflite"
with open(tflite_path, "wb") as f:
    f.write(tflite_model)

print(f"TFLite model saved to: {tflite_path}")