In [32]:
import pandas as pd
import pymysql
import os
from dotenv import load_dotenv

In [37]:
load_dotenv()

True

In [36]:
# MYSQL Connection
conn = pymysql.connect(
    host='127.0.0.1', 
    port=3306, 
    user='root',
    password=os.getenv('MYSQLPW'),
    db='VideoStreaming',
)

cursor = conn.cursor(pymysql.cursors.DictCursor)

In [3]:
# Step 1: Create tables
create_statements = {
    'Channels_Data': """
        CREATE TABLE IF NOT EXISTS Channels_Data (
            ChannelID INT,
            Name VARCHAR(255),
            Description TEXT,
            OwnerID INT,
            Subscribers INT,
            CreationDate DATE
        );
    """,
    'Videos_Data': """
        CREATE TABLE IF NOT EXISTS Videos_Data (
            VideoID INT,
            Title VARCHAR(255),
            ChannelID INT,
            Category VARCHAR(100),
            Duration INT,
            Rating FLOAT,
            UploadDate DATE
        );
    """,
    'Subscriptions_Data': """
        CREATE TABLE IF NOT EXISTS Subscriptions_Data (
            TransactionID INT,
            ChannelID INT,
            UserID INT,
            SubscriptionDate DATE,
            Status VARCHAR(50),
            Amount INT
        );
    """,
    'Views_Data': """
        CREATE TABLE IF NOT EXISTS Views_Data (
            ViewID INT,
            VideoID INT,
            UserID INT,
            ViewDate DATE,
            DurationWatched INT,
            Feedback TEXT
        );
    """,
    'Users_Data': """
        CREATE TABLE IF NOT EXISTS Users_Data (
            UserID INT,
            FirstName VARCHAR(100),
            LastName VARCHAR(100),
            Country VARCHAR(100),
            BirthYear INT,
            Email VARCHAR(255)
        );
    """,
    'Owners_Data': """
        CREATE TABLE IF NOT EXISTS Owners_Data (
            OwnerID INT,
            FirstName VARCHAR(100),
            LastName VARCHAR(100),
            Country VARCHAR(100),
            Email VARCHAR(255),
            BirthYear INT
        );
    """
}

In [4]:
for table, statement in create_statements.items():
    cursor.execute(statement)

# Step 2: Load and insert data from CSVs
tables = {
    'sql_channel.csv': 'Channels_Data',
    'sql_video.csv': 'Videos_Data',
    'sql_subscription.csv': 'Subscriptions_Data',
    'sql_view.csv': 'Views_Data',
    'sql_user.csv': 'Users_Data',
    'sql_owner.csv': 'Owners_Data'
}

for filename, table in tables.items():
    df = pd.read_csv(filename)

    # Clean column names
    df.columns = [col.strip().replace(" ", "") for col in df.columns]

    # Convert date columns to proper format
    for col in df.columns:
        if "Date" in col:
            df[col] = pd.to_datetime(df[col], errors='coerce').dt.date

    # Prepare insert
    cols = ", ".join(df.columns)
    placeholders = ", ".join(["%s"] * len(df.columns))
    sql = f"INSERT INTO {table} ({cols}) VALUES ({placeholders})"

    for row in df.itertuples(index=False):
        cursor.execute(sql, tuple(row))

    print(f"Inserted {len(df)} rows into {table}")

# Finalize
conn.commit()
cursor.close()
conn.close()

Inserted 10 rows into Channels_Data
Inserted 60 rows into Videos_Data
Inserted 50 rows into Subscriptions_Data
Inserted 70 rows into Views_Data
Inserted 50 rows into Users_Data
Inserted 10 rows into Owners_Data


In [5]:
conn = pymysql.connect(
    host='127.0.0.1', 
    port=3306, 
    user='root',
    password='Wwewrestler1!1',
    db='VideoStreaming',
)

cursor = conn.cursor()

## Channel Popularity

In [6]:
# Channels with the highest total views across all videos

query = """
SELECT c.Name, SUM(vw.DurationWatched) AS TotalViews
FROM Channels_Data c
JOIN Videos_Data v ON c.ChannelID = v.ChannelID
JOIN Views_Data vw ON v.VideoID = vw.VideoID
GROUP BY c.ChannelID, c.Name
ORDER BY TotalViews DESC
LIMIT 5;
"""

cursor.execute(query)
results = cursor.fetchall()

for row in results:
    name, total = row
    print(name, int(total)) 


Epic Gaming Hub 408752
DIY Maker Lab 288368
Cinema Corner 278448
Fitness Pro Tips 247040
QuickTech Reviews 246488


In [7]:
# Channels with the most subscribers

query = """
SELECT c.Name, c.Subscribers
FROM Channels_Data c
ORDER BY c.Subscribers DESC
LIMIT 5;
"""

cursor.execute(query)
results = cursor.fetchall()

for row in results:
    name, subscribers = row
    print(name, int(subscribers))

Indie Music Lounge 152408
Indie Music Lounge 152408
Science Explained 145562
Science Explained 145562
HomeCooking Daily 111755


In [8]:
# Channels with the highest-rated videos on average

query = """
SELECT v.Title, ROUND(AVG(v.Rating), 2) AS AvgRating
FROM Videos_Data v
GROUP BY v.Title
ORDER BY AvgRating DESC
LIMIT 5;
"""

cursor.execute(query)
results = cursor.fetchall()

for row in results:
    title, avg_rating = row
    print(title, round(float(avg_rating), 2))

How to Bake Perfect Sourdough 4.98
5 Yoga Poses for Back Pain 4.96
Minecraft 1.21 Building Ideas 4.93
How to Refinish Old Furniture 4.89
Dune Part Two Spoiler Review 4.84


In [9]:
#Channels with the highest revenue (subscription amount)

query = """
SELECT c.Name, SUM(s.Amount) AS TotalRevenue
FROM Channels_Data c
JOIN Subscriptions_Data s ON c.ChannelID = s.ChannelID
GROUP BY c.ChannelID, c.Name
ORDER BY TotalRevenue DESC
LIMIT 5;
"""
cursor.execute(query)
results = cursor.fetchall()

for row in results:
    name, total = row
    print(name, int(total))


QuickTech Reviews 536
DIY Maker Lab 432
Fitness Pro Tips 392
World Travel Vlogs 364
Epic Gaming Hub 264


## Subscription Trends

In [10]:
# Most popular subscription status

query = """
SELECT s.Status, COUNT(*) AS Count
FROM Subscriptions_Data s
GROUP BY s.Status
ORDER BY Count DESC
"""
cursor.execute(query)
results = cursor.fetchall()

for row in results:
    status, count = row
    print(status, int(count))


active 70
cancelled 16
paused 14


In [15]:
# Monthly growth of subscriptions over time

query = """
SELECT c.Name, DATE_FORMAT(s.SubscriptionDate, '%Y-%m') AS Month, COUNT(*) AS Count
FROM Subscriptions_Data s
JOIN Channels_Data c ON s.ChannelID = c.ChannelID
GROUP BY Month, c.Name
ORDER BY Month DESC
LIMIT 5;
"""
cursor.execute(query)
results = cursor.fetchall()

for row in results:
    name, month, count = row
    print(name, month, int(count))

Cinema Corner 2025-05 4
QuickTech Reviews 2025-05 4
Science Explained 2025-05 4
World Travel Vlogs 2025-05 4
Cinema Corner 2025-04 4
