In [2]:
# Cell 1: Import libraries and load the data
import pandas as pd
import numpy as np

print("Loading the dataset...")
df = pd.read_csv('Churn.csv')
print("Dataset loaded successfully.")

# Cell 2: Clean the data
print("Starting data cleaning...")
# The 'TotalCharges' column has some empty spaces. We need to fix this.
# We will turn the empty spaces into 'NaN' (Not a Number)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Now we find any rows with NaN and fill them with the median value of the column.
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())
print("Data cleaning complete! The data is now ready.")

# Cell 3: Check your work
# Display the first 5 rows and info to confirm it's clean.
print(df.head())
df.info()

Loading the dataset...
Dataset loaded successfully.
Starting data cleaning...
Data cleaning complete! The data is now ready.
   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4             

In [3]:
# Cell 4: Connect to PostgreSQL and load the data
from sqlalchemy import create_engine

# --- IMPORTANT: Change 'your_password' to the password you wrote down! ---
db_password = 'Riya2392260'

# Connection details
db_user = 'postgres'
db_host = 'localhost'
db_port = '5432'
db_name = 'telecom_churn_db'

print("Connecting to the database...")
connection_string = f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_string)

# This line sends your DataFrame 'df' to a new table called 'customers' in PostgreSQL
df.to_sql('customers', engine, if_exists='replace', index=False)

print("Success! Your data is now in the PostgreSQL database.")

Connecting to the database...
Success! Your data is now in the PostgreSQL database.
