In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
%matplotlib inline

df = pd.read_csv("netflix_customer_churn.csv")


In [None]:
print(df.head())
print("\nMissing values:\n", df.isnull().sum())
for col in ['gender','subscription_type','region','device','payment_method','favorite_genre']:
    print(f"\nUnique values in {col}:", df[col].unique())


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
sns.histplot(df['age'], kde=True, ax=axes[0,0])
axes[0,0].set_title('Age Distribution')

sns.histplot(df['watch_hours'], kde=True, ax=axes[0,1])
axes[0,1].set_title('Watch Hours Distribution')

sns.histplot(df['monthly_fee'], kde=True, ax=axes[1,0])
axes[1,0].set_title('Monthly Fee Distribution')

sns.countplot(x='churned', data=df, ax=axes[1,1])
axes[1,1].set_title('Churn Count')
plt.tight_layout()
plt.show()

categorical_cols = ['subscription_type', 'gender', 'region', 'device', 'payment_method', 'favorite_genre']
for col in categorical_cols:
    plt.figure(figsize=(8,4))
    sns.countplot(x=col, data=df, order=df[col].value_counts().index)
    plt.title(f'{col} Count')
    plt.xticks(rotation=45)
    plt.show()


In [None]:
plt.figure(figsize=(8,4))
sns.barplot(x='subscription_type', y='watch_hours', data=df)
plt.title('Avg Watch Hours by Subscription Type')
plt.show()

plt.figure(figsize=(8,4))
sns.barplot(x='subscription_type', y='monthly_fee', data=df)
plt.title('Avg Monthly Fee by Subscription Type')
plt.show()

plt.figure(figsize=(10,4))
sns.barplot(x='region', y='watch_hours', data=df)
plt.title('Avg Watch Hours by Region')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(10,4))
sns.barplot(x='region', y='monthly_fee', data=df)
plt.title('Avg Monthly Fee by Region')
plt.xticks(rotation=45)
plt.show()

plt.figure(figsize=(8,4))
sns.barplot(x='device', y='watch_hours', data=df)
plt.title('Avg Watch Hours by Device')
plt.show()

plt.figure(figsize=(8,4))
sns.barplot(x='device', y='monthly_fee', data=df)
plt.title('Avg Monthly Fee by Device')
plt.show()

plt.figure(figsize=(10,4))
sns.barplot(x='favorite_genre', y='avg_watch_time_per_day', data=df)
plt.title('Avg Watch Time per Day by Favorite Genre')
plt.xticks(rotation=45)
plt.show()

for col in ['gender','region','subscription_type','payment_method']:
    plt.figure(figsize=(8,4))
    churn_rate = df.groupby(col)['churned'].mean().reset_index()
    sns.barplot(x=col, y='churned', data=churn_rate)
    plt.title(f'Churn Rate by {col}')
    plt.xticks(rotation=45)
    plt.show()


In [None]:
plt.figure(figsize=(10,8))
numerical_cols = ['age','watch_hours','last_login_days','monthly_fee','number_of_profiles','avg_watch_time_per_day','churned']
corr = df[numerical_cols].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


In [None]:
print("\n--- Sample Insights ---")
print("1. Churn rate higher among certain payment methods or subscription types.")
print("2. Lower watch_hours associated with higher churn.")
print("3. Premium plan users tend to pay more, but may churn differently.")
print("4. Region differences exist in watch_hours and churn.")
print("5. Favorite genre preferences show differences in avg watch time per day.")
