<a href="https://colab.research.google.com/github/Manish-yadav-hub/flight-ticket-booking/blob/main/Untitled11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -----------------------------------------------------------
# CUSTOMER • REVENUE • CHURN INSIGHTS – SIMPLE PYTHON PROJECT
# -----------------------------------------------------------

import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files

# -----------------------------------------------------------
# 1. UPLOAD DATASET
# -----------------------------------------------------------
print("Upload your CSV file...")
uploaded = files.upload()

file_name = list(uploaded.keys())[0]   # Get uploaded file name
df = pd.read_csv(file_name)

print("Dataset Loaded Successfully!")
df.head()

# -----------------------------------------------------------
# 2. BASIC CLEANING
# -----------------------------------------------------------
df = df.drop_duplicates()
df = df.fillna(0)

# Convert date columns if present
for col in df.columns:
    if "date" in col.lower():
        try:
            df[col] = pd.to_datetime(df[col], errors="coerce")
        except:
            pass

# -----------------------------------------------------------
# 3. FEATURE CREATION (Simple)
# -----------------------------------------------------------

# Churn Flag (if churn column exists)
if "churn" in df.columns:
    df["churn_flag"] = df["churn"].astype(str).str.lower().apply(
        lambda x: 1 if x in ["yes", "1", "true"] else 0
    )

# Revenue Segment
if "revenue" in df.columns:
    def segment(val):
        val = float(val)
        if val >= 10000: return "High"
        if val >= 5000:  return "Medium"
        return "Low"
    df["revenue_segment"] = df["revenue"].apply(segment)

# Active days (simple calculation if two date columns found)
date_cols = [c for c in df.columns if "date" in c.lower()]
if len(date_cols) >= 2:
    try:
        df["active_days"] = (df[date_cols[1]] - df[date_cols[0]]).dt.days
    except:
        df["active_days"] = 0

# -----------------------------------------------------------
# 4. KPIs
# -----------------------------------------------------------

print("\n---------------- KEY METRICS ----------------")

if "revenue" in df.columns:
    print("Total Revenue:", df["revenue"].sum())

if "customer_id" in df.columns:
    print("Total Customers:", df["customer_id"].nunique())

if "churn_flag" in df.columns:
    print("Churn Rate:", round(df["churn_flag"].mean()*100, 2), "%")

print("------------------------------------------------")

# -----------------------------------------------------------
# 5. VISUALIZATIONS
# -----------------------------------------------------------

# Revenue by City
if "city" in df.columns and "revenue" in df.columns:
    df.groupby("city")["revenue"].sum().plot(kind="bar", figsize=(8,4))
    plt.title("Revenue by City")
    plt.ylabel("Revenue")
    plt.show()

# Churn Pie Chart
if "churn_flag" in df.columns:
    df["churn_flag"].value_counts().plot(
        kind="pie",
        autopct="%1.1f%%",
        figsize=(5,5),
        labels=["Active", "Churned"]
    )
    plt.title("Churn Distribution")
    plt.show()

# Revenue Segment Bar
if "revenue_segment" in df.columns:
    df["revenue_segment"].value_counts().plot(
        kind="bar",
        figsize=(5,4)
    )
    plt.title("Customer Revenue Segments")
    plt.xlabel("Segment")
    plt.ylabel("Count")
    plt.show()

print("\n✔ Project Execution Completed Successfully!")


Upload your CSV file...
