In [None]:
import os 
import pandas as pd

In [None]:
file_path = os.path.join(os.pardir,"Data","telco_churn_to_predict.csv")

In [None]:
try:
    df = pd.read_csv("C:/Telco_Customer_Churn_Prediction/Data/telco_churn_to_predict.csv",encoding = 'utf-8')
    print("Data Loaded Succesfully")
except FileNotFoundError:
    print(f"Error: The file was not found at {file_path}.")
    print("Please make sure 'telco_churn_to_predict.csv' is in the correct 'data' folder.")
    df = None

In [None]:
if df is not None:
    print("\nFirst 5 Rows of the Data Set")
    print(df.head())
    
    print("\n Data Information")
    df.info()

In [None]:
df['offer'] = df['offer'].fillna('None')
df['internet_type'] = df['internet_type'].fillna('No Internet')
print("Non-Null After Handling Missing Values")
df.info()

In [None]:
#df['total_charges'].unique()

In [None]:
df['total_charges'] = pd.to_numeric(df['total_charges'], errors ='coerce')
df.dropna(subset=['total_charges'], inplace = True)

print("DataFrame information after converting 'total_charges':")
df.info()

print(f"\nNumber of rows after dropping missing values: {len(df)}")

In [None]:
categorical_cols = df.select_dtypes(include = ['object']).columns
print("Categorical columns to be encoded:")
print(categorical_cols)

df = pd.get_dummies(df , columns = categorical_cols, drop_first =True, dtype = int )
print(f"\nDataFrame shape after one-hot encoding: {df.shape}")

print("\nFirst 5 rows of the dataset after one-hot encoding:")
print(df.head())

In [None]:
# Import the visualization library matplotlib.
import matplotlib.pyplot as plt

# Get the count of each unique value in the 'churn_label' column.
# Note: The original 'churn_label' column was one-hot encoded and is now 'churn_label_Yes'.
churn_counts = df['churn_label_Yes'].value_counts()

# Print the counts.
print("Count of Churn Labels:")
print("0 = No Churn")
print("1 = Churn")
print(churn_counts)

# Calculate the percentages.
churn_percentages = df['churn_label_Yes'].value_counts(normalize=True) * 100

# Print the percentages, rounded to 2 decimal places.
print("\nPercentage of Churn Labels:")
print(churn_percentages.round(2))

# Create a bar chart to visualize the distribution.
plt.figure(figsize=(6, 4))
churn_counts.plot(kind='bar', color=['skyblue', 'salmon'])
plt.title('Distribution of Churn Labels')
plt.xlabel('Churn Label (0 = No, 1 = Yes)')
plt.ylabel('Number of Customers')
plt.xticks(rotation=0)
plt.show()

In [None]:
# Create a figure with two subplots side-by-side.
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Plot a histogram for 'monthly_charge'.
axes[0].hist(df['monthly_charge'], bins=30, color='skyblue', edgecolor='black')
axes[0].set_title('Distribution of Monthly Charges')
axes[0].set_xlabel('Monthly Charge ($)')
axes[0].set_ylabel('Number of Customers')

# Plot a box plot for 'monthly_charge' on the second subplot.
axes[1].boxplot(df['monthly_charge'])
axes[1].set_title('Monthly Charges Box Plot')
axes[1].set_ylabel('Monthly Charge ($)')

plt.tight_layout()
plt.show()

# Create a new figure for 'tenure_in_months'.
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Plot a histogram for 'tenure_in_months'.
axes[0].hist(df['tenure_in_months'], bins=30, color='lightgreen', edgecolor='black')
axes[0].set_title('Distribution of Tenure in Months')
axes[0].set_xlabel('Tenure (Months)')
axes[0].set_ylabel('Number of Customers')

# Plot a box plot for 'tenure_in_months' on the second subplot.
axes[1].boxplot(df['tenure_in_months'])
axes[1].set_title('Tenure in Months Box Plot')
axes[1].set_ylabel('Tenure (Months)')

plt.tight_layout()
plt.show()
