In [None]:
# Add the src_telco_churn folder to the system path
import sys
sys.path.append('./src_telco_churn')

# Now import the necessary modules
from src_telco_churn.data_loader import CSVDataLoader, DataPreparer  
from src_telco_churn.modeling import CrossValidator, TrainTestSplit, LogisticRegressionModel, HyperparameterTuner, ModelingPipeline
from src_telco_churn.preprocessor import HandleMissingValues,NormalizeData, EncodeCategoricalData, HandleOutliers,PreprocessingPipeline
from src_telco_churn.feature_engineering import (StatisticalFeatures, CategoricalEncoding, InteractionFeatures, TemporalFeatures, DerivedFeatures,FeaturePipeline)
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib  # Import joblib for saving the model

# Step 1: Load the data
# Initialize the CSVDataLoader with required columns
required_columns = ['gender', 'tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']  # Adjust to your dataset
csv_loader = CSVDataLoader(required_columns=required_columns)
preparer = DataPreparer(loaders=[csv_loader])
                        
# Load and validate the dataset
file_path = '/Users/tarangkadyan/Downloads/telco_churn_library/Data/data.csv'  # Provide the correct path to your data
data = preparer.load_and_validate(file_path, loader_type="csv")
data.head()

#Step-2:  Preprocessing data
# Initialize the preprocessing pipeline
pipeline = PreprocessingPipeline(preprocessors=[
    HandleMissingValues(strategy='mean'),  # Impute missing values with the mean
    NormalizeData(method='minmax'),         # Normalize data using MinMax scaling
    EncodeCategoricalData(),               # One-hot encode categorical variables
    HandleOutliers(method='iqr')           # Handle outliers using IQR method
])

# Apply preprocessing to the data
processed_data = pipeline.apply(data)

# Display the preprocessed data
print(processed_data.head())

# Step 3: Feature Engineering
pipeline = FeaturePipeline(transformers=[
    StatisticalFeatures(group_by_column=None),
    CategoricalEncoding(),
    InteractionFeatures(),
    TemporalFeatures(),
    DerivedFeatures()
])

# Apply feature engineering to the processed data
df_transformed = pipeline.apply(processed_data)

# Step 4: Split the data into train and test sets
features_data = df_transformed  # Use the feature-engineered data
X = features_data.drop(columns='Churn')  # Adjust target column name as needed
y = features_data['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model_trainer = ModelTrainer()
model = model_trainer.train_model(X_train, y_train)

# Step 6: Evaluate the model
model_evaluator = ModelEvaluator(model)
evaluation_results = model_evaluator.evaluate_model(X_test, y_test)
print(evaluation_results)


In [None]:
# Step 7: Save the trained model to a pickle file
joblib.dump(model, "trained_model.pkl")

print("Model saved successfully as 'trained_model.pkl'")

# Step 8: Predict on the test set
predictions = model.predict(X_test)
print(predictions)