In [5]:
# Import library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer  # Untuk menangani NaN
from sklearn.metrics import classification_report, accuracy_score
import joblib  # Untuk menyimpan model

# Load dataset
# Pastikan file Anda bernama "data.csv" atau sesuaikan dengan nama file Anda
df = pd.read_csv('Telco-Customer-Churn.csv')

# Pilih kolom yang relevan
selected_features = ['tenure', 'Contract', 'MonthlyCharges', 'PaperlessBilling', 'PaymentMethod']
target = 'Churn'

# Ubah target menjadi numerik
df['Churn'] = df['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

# Pisahkan fitur dan target
X = df[selected_features]
y = df[target]

# Tangani missing values (NaN) di dataset
# Imputasi untuk data numerik dengan mean, dan kategoris dengan 'most_frequent'
numerical_cols = ['tenure', 'MonthlyCharges']
categorical_cols = ['Contract', 'PaperlessBilling', 'PaymentMethod']

numerical_imputer = SimpleImputer(strategy='mean')  # Mengisi NaN numerik dengan mean
categorical_imputer = SimpleImputer(strategy='most_frequent')  # Mengisi NaN kategoris dengan nilai paling sering

# Terapkan imputasi pada kolom numerik
for col in numerical_cols:
    X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()  # Gunakan .ravel() untuk membuat array 1D

# Terapkan imputasi pada kolom kategoris
for col in categorical_cols:
    X[col] = categorical_imputer.fit_transform(X[[col]]).ravel()

# Preprocessing pipeline
numerical_transformer = StandardScaler()  # Standarisasi untuk fitur numerik
categorical_transformer = OneHotEncoder(handle_unknown='ignore')  # One-hot encoding untuk fitur kategoris

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ]
)

# Buat pipeline dengan Logistic Regression
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(random_state=42))
])

# Split data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Latih model
model_pipeline.fit(X_train, y_train)

# Evaluasi model
y_pred = model_pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Simpan model ke file untuk digunakan di Streamlit
joblib.dump(model_pipeline, 'logistic_model.pkl')
print("Model berhasil disimpan ke 'logistic_model.pkl'")

Accuracy: 0.7650816181689141
              precision    recall  f1-score   support

           0       0.81      0.89      0.85      1035
           1       0.58      0.43      0.49       374

    accuracy                           0.77      1409
   macro avg       0.69      0.66      0.67      1409
weighted avg       0.75      0.77      0.75      1409

Model berhasil disimpan ke 'logistic_model.pkl'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()  # Gunakan .ravel() untuk membuat array 1D
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = numerical_imputer.fit_transform(X[[col]]).ravel()  # Gunakan .ravel() untuk membuat array 1D
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X