In [8]:
# Add the src_telco_churn folder to the system path
import sys
sys.path.append('./src_telco_churn')

# Now import the necessary modules
from src_telco_churn.data_loader import CSVDataLoader, DataPreparer  
from src_telco_churn.modeling import CrossValidator, TrainTestSplit, LogisticRegressionModel, HyperparameterTuner, ModelingPipeline
from src_telco_churn.preprocessor import HandleMissingValues, NormalizeData, EncodeCategoricalData, HandleOutliers,PreprocessingPipeline
from src_telco_churn.feature_engineering import (StatisticalFeatures, CategoricalEncoding, InteractionFeatures, TemporalFeatures, DerivedFeatures,FeaturePipeline)
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
import pandas as pd

# Step 1: Load the data
# Initialize the CSVDataLoader with required columns
required_columns = ['gender', 'tenure', 'MonthlyCharges', 'TotalCharges', 'Churn']  # Adjust to your dataset
csv_loader = CSVDataLoader(required_columns=required_columns)
preparer = DataPreparer(loaders=[csv_loader])
                        
# Load and validate the dataset
file_path = '/Users/tarangkadyan/Downloads/telco_churn_library/Data/data.csv'  # Provide the correct path to your data
data = preparer.load_and_validate(file_path, loader_type="csv")
data.head()

#Step-2:  Preprocessing data
# Initialize the preprocessing pipeline
pipeline = PreprocessingPipeline(preprocessors=[
    HandleMissingValues(strategy='mean'),  # Impute missing values with the mean
    NormalizeData(method='minmax'),         # Normalize data using MinMax scaling
    EncodeCategoricalData(),               # One-hot encode categorical variables
    HandleOutliers(method='iqr')           # Handle outliers using IQR method
])

# Apply preprocessing to the data
processed_data = pipeline.apply(data)

# Display the preprocessed data
print(processed_data.head())

Data loaded successfully from /Users/tarangkadyan/Downloads/telco_churn_library/Data/data.csv.
Data validation successful.
   SeniorCitizen    tenure  MonthlyCharges  customerID_0002-ORFBO  \
0            0.0  0.013889        0.115423                    0.0   
1            0.0  0.472222        0.385075                    0.0   
2            0.0  0.027778        0.354229                    0.0   
3            0.0  0.625000        0.239303                    0.0   
4            0.0  0.027778        0.521891                    0.0   

   customerID_0003-MKNFE  customerID_0004-TLHLJ  customerID_0011-IGKFF  \
0                    0.0                    0.0                    0.0   
1                    0.0                    0.0                    0.0   
2                    0.0                    0.0                    0.0   
3                    0.0                    0.0                    0.0   
4                    0.0                    0.0                    0.0   

   customerID_001

In [9]:
# Step 3: Feature Engineering
pipeline = FeaturePipeline(transformers=[
    StatisticalFeatures(group_by_column=None),
    CategoricalEncoding(),
    InteractionFeatures(),
    TemporalFeatures(),
    DerivedFeatures()
])

# Apply feature engineering to the processed data
df_transformed = pipeline.apply(processed_data)

# Step 4: Split the data into train and test sets
features_data = df_transformed  # Use the feature-engineered data
X = features_data.drop(columns='Churn')  # Adjust target column name as needed
y = features_data['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model_trainer = ModelTrainer()
model = model_trainer.train_model(X_train, y_train)

# Step 6: Evaluate the model
model_evaluator = ModelEvaluator(model)
evaluation_results = model_evaluator.evaluate_model(X_test, y_test)
print(evaluation_results)

# Step 7: Predict on the test set
predictions = model.predict(X_test)
print(predictions)


In [6]:
# Step 3: Feature Engineering
# Initialize the Feature Engineering Pipeline
# Example usage of the FeaturePipeline
pipeline = FeaturePipeline(transformers=[
    StatisticalFeatures(group_by_column='group'),
    CategoricalEncoding(),
    InteractionFeatures(),
    TemporalFeatures(),
    DerivedFeatures()
])

df_transformed = pipeline.apply(df)  # Apply the transformations to your dataframe


# Step 4: Split the data into train and test sets
X = features_data.drop(columns='target_column')  # Adjust 'target_column' as per your dataset
y = features_data['target_column']  # Adjust 'target_column' as per your dataset

# Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
# Initialize the ModelTrainer
model_trainer = ModelTrainer()
model = model_trainer.train_model(X_train, y_train)  # Train the model with training data

# Step 6: Evaluate the model
# Initialize the ModelEvaluator
model_evaluator = ModelEvaluator(model)
evaluation_results = model_evaluator.evaluate_model(X_test, y_test)  # Evaluate on test data
print(evaluation_results)

# Step 7: Predict on the test set
predictions = model.predict(X_test)  # Get predictions (probabilities) on the test set
print(predictions)

# Step 8: Hyperparameter Tuning (optional)
# Initialize ModelTuning and perform hyperparameter tuning (if desired)

NameError: name 'FeatureEngineering' is not defined

In [None]:
# Step 4: Split the data into train and test sets
X = features_data.drop(columns='target_column')  # Adjust 'target_column' as per your dataset
y = features_data['target_column']  # Adjust 'target_column' as per your dataset

# Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
# Initialize the ModelTrainer
model_trainer = ModelTrainer()
model = model_trainer.train_model(X_train, y_train)  # Train the model with training data

# Step 6: Evaluate the model
# Initialize the ModelEvaluator
model_evaluator = ModelEvaluator(model)
evaluation_results = model_evaluator.evaluate_model(X_test, y_test)  # Evaluate on test data
print(evaluation_results)

# Step 7: Predict on the test set
predictions = model.predict(X_test)  # Get predictions (probabilities) on the test set
print(predictions)

# Step 8: Hyperparameter Tuning (optional)
# Initialize ModelTuning and perform hyperparameter tuning (if desired)