# 1. Importing the Libraries

In [1]:
# importing packages

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import warnings 
warnings.filterwarnings('ignore')

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


# 2. Data Extraction

In [2]:
# loading the data 

amazon_df = pd.read_csv("amazon_prime_titles.csv")
disney_df = pd.read_csv("disney_plus_titles.csv")
netflix_df = pd.read_csv("netflix_titles.csv")


# 3. Exploratory Data Analysis

## 3.1 Shape

In [3]:
amazon_df.shape

(9668, 12)

In [4]:
disney_df.shape

(1450, 12)

In [5]:
netflix_df.shape

(8807, 12)

## 3.2 Data Types

In [6]:
amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9668 entries, 0 to 9667
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       9668 non-null   object
 1   type          9668 non-null   object
 2   title         9668 non-null   object
 3   director      7585 non-null   object
 4   cast          8435 non-null   object
 5   country       672 non-null    object
 6   date_added    155 non-null    object
 7   release_year  9668 non-null   int64 
 8   rating        9331 non-null   object
 9   duration      9668 non-null   object
 10  listed_in     9668 non-null   object
 11  description   9668 non-null   object
dtypes: int64(1), object(11)
memory usage: 906.5+ KB


In [7]:
disney_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1450 entries, 0 to 1449
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       1450 non-null   object
 1   type          1450 non-null   object
 2   title         1450 non-null   object
 3   director      977 non-null    object
 4   cast          1260 non-null   object
 5   country       1231 non-null   object
 6   date_added    1447 non-null   object
 7   release_year  1450 non-null   int64 
 8   rating        1447 non-null   object
 9   duration      1450 non-null   object
 10  listed_in     1450 non-null   object
 11  description   1450 non-null   object
dtypes: int64(1), object(11)
memory usage: 136.1+ KB


In [8]:
netflix_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


## 3.3 Checking missing values

In [9]:
amazon_df.isnull().sum()

show_id            0
type               0
title              0
director        2083
cast            1233
country         8996
date_added      9513
release_year       0
rating           337
duration           0
listed_in          0
description        0
dtype: int64

In [10]:
disney_df.isnull().sum()

show_id           0
type              0
title             0
director        473
cast            190
country         219
date_added        3
release_year      0
rating            3
duration          0
listed_in         0
description       0
dtype: int64

In [11]:
netflix_df.isnull().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

## 3.4 Checking duplicate values

In [12]:
amazon_df.duplicated().sum()

0

In [13]:
disney_df.duplicated().sum()

0

In [14]:
netflix_df.duplicated().sum()

0

# 4. Data Transformation

## 4.1 Data Cleaning

### 4.1.1 Amazon

Country and date_added columns have more than 80% of the missing values and hence dropping them.

In [15]:
# Dropping columns 

amazon_df = amazon_df.drop(['country', 'date_added'], axis=1)

# Removing rows with null values in 'rating' column

amazon_df.dropna(subset=['rating'], inplace=True)

# Dropping irrelevant columns for analysis

amazon_df = amazon_df.drop(['show_id','director','cast'], axis=1)

### 4.1.2 Disney_Plus

In [16]:
# Dropping columns 

disney_df = disney_df.drop(['country', 'date_added'], axis=1)

# Removing rows with null values in 'rating' column

disney_df.dropna(subset=['rating'], inplace=True)

# Dropping irrelevant columns for analysis

disney_df = disney_df.drop(['show_id','director','cast'], axis=1)

### 4.1.3 Netflix

In [17]:
# Dropping columns 

netflix_df = netflix_df.drop(['country', 'date_added'], axis=1)

# Removing rows with null values in 'rating' column

netflix_df.dropna(subset=['rating', 'duration'], inplace=True)

# Dropping irrelevant columns for analysis

netflix_df = netflix_df.drop(['show_id','director','cast'], axis=1)

## 4.2 Adding new columns

In [18]:
amazon_df['platform'] = 'Amazon Prime'
amazon_df['headquarters'] = 'Seattle, Washington'
amazon_df['date_founded'] = '02/02/2005'

disney_df['platform'] = 'Disney Plus'
disney_df['headquarters'] = 'Los Angeles, California'
disney_df['date_founded'] = '11/12/2019'

netflix_df['platform'] = 'Netflix'
netflix_df['headquarters'] = 'Los Gatos, California'
netflix_df['date_founded'] = '08/29/1997'

## 4.3 Merging the data frames

Merging all the three data frames to a single dataframe for analysis.

In [19]:
streaming_platforms_df = pd.concat([amazon_df,disney_df,netflix_df], ignore_index=True)

streaming_platforms_df.head()

Unnamed: 0,type,title,release_year,rating,duration,listed_in,description,platform,headquarters,date_founded
0,Movie,Take Care Good Night,2018,13+,110 min,"Drama, International",A Metro Family decides to fight a Cyber Crimin...,Amazon Prime,"Seattle, Washington",02/02/2005
1,Movie,Zoombies,2016,13+,87 min,"Horror, Science Fiction",When a strange virus quickly spreads through a...,Amazon Prime,"Seattle, Washington",02/02/2005
2,TV Show,Zoo Babies,2008,ALL,1 Season,"Kids, Special Interest",A heart warming and inspiring series that welc...,Amazon Prime,"Seattle, Washington",02/02/2005
3,TV Show,Zoë Coombs Marr: Bossy Bottom,2020,18+,1 Season,"Comedy, Talk Show and Variety",Zoë Coombs Marr has been on hiatus. Sort of. F...,Amazon Prime,"Seattle, Washington",02/02/2005
4,Movie,Zoe,2018,R,104 min,Science Fiction,ZOE tells a tale of forbidden love between an ...,Amazon Prime,"Seattle, Washington",02/02/2005


In [20]:
streaming_platforms_df.shape

(19578, 10)

## 4.4 Renaming the Column

In [21]:
streaming_platforms_df.rename(columns={'listed_in':'genre'}, inplace=True)

## 4.5 Formatting the data types

Convert the string data type of 'date_founded' to datetime.

In [22]:
streaming_platforms_df['date_founded'] = pd.to_datetime(streaming_platforms_df['date_founded'])

## 4.6 Replacing the column values

The dataset contains several redundant rating categories. For example, rating category 16, AGES_16_, 16+ represent the same category 16+. For analysis purpose, we replace the rating category values to a unified value.

In [23]:
streaming_platforms_df['rating'].value_counts()

rating
TV-MA       3284
TV-14       2447
13+         2117
R           1809
16+         1547
TV-PG       1333
ALL         1268
18+         1243
PG-13        949
PG           776
TV-G         619
TV-Y7        504
TV-Y         431
G            387
7+           385
NR           303
TV-NR        105
UNRATED       33
TV-Y7-FV      19
NC-17          6
AGES_18_       3
NOT_RATE       3
UR             3
AGES_16_       2
ALL_AGES       1
16             1
Name: count, dtype: int64

In [51]:
streaming_platforms_df['rating'].replace(['16','AGES_16_'], '16+', inplace=True)
streaming_platforms_df['rating'].replace(['AGES_18_'], '18+', inplace=True)
streaming_platforms_df['rating'].replace(['ALL_AGES'], 'ALL', inplace=True)
streaming_platforms_df['rating'].replace(['TV-NR','UNRATED','NOT_RATE','UR'], 'NR', inplace=True)

In [135]:
streaming_platforms_df.head()

Unnamed: 0,type,title,release_year,rating,duration,genre,description,platform,headquarters,date_founded,genre_first_words
0,Movie,Take Care Good Night,2018,13+,110 min,Drama,A Metro Family decides to fight a Cyber Crimin...,Amazon Prime,"Seattle, Washington",2005-02-02,[Drama]
1,Movie,Zoombies,2016,13+,87 min,Horror,When a strange virus quickly spreads through a...,Amazon Prime,"Seattle, Washington",2005-02-02,[Horror]
2,TV Show,Zoo Babies,2008,ALL,1 Season,Kids,A heart warming and inspiring series that welc...,Amazon Prime,"Seattle, Washington",2005-02-02,[Kids]
3,TV Show,Zoë Coombs Marr: Bossy Bottom,2020,18+,1 Season,Comedy,Zoë Coombs Marr has been on hiatus. Sort of. F...,Amazon Prime,"Seattle, Washington",2005-02-02,[Comedy]
4,Movie,Zoe,2018,R,104 min,Science Fiction,ZOE tells a tale of forbidden love between an ...,Amazon Prime,"Seattle, Washington",2005-02-02,[Science]


### for genre, simplify

In [134]:
# Update the main DataFrame to include the first word of each genre
genre_first_words = []
for index, row in streaming_platforms_df.iterrows():
    row_genre_first_words = []
    for genre in row['genre'].split(', '):
        first_word = genre.split()[0]  # Extract the first word
        row_genre_first_words.append(first_word)
    genre_first_words.append(row_genre_first_words)

# Add a new column 'genre_first_words' to the DataFrame
streaming_platforms_df['genre_first_words'] = genre_first_words

# Commit the changes
connection.commit()

In [137]:
from collections import Counter

# List of values in the 'genre' column
genrelist = streaming_platforms_df['genre'].tolist()

# Count the occurrences of each genre
genre_counts = Counter(genrelist)

# Show the counts
for genre, count in genre_counts.items():
    print(f"{genre}: {count}")

Drama: 2179
Horror: 519
Kids: 370
Comedy: 1626
Science Fiction: 86
Action: 1615
Arts: 405
TV Shows: 278
Documentary: 898
Animation: 807
Anime: 44
Music Videos and Concerts: 102
Fitness: 83
Faith and Spirituality: 13
Special Interest: 181
Adventure: 68
Fantasy: 23
Suspense: 189
Unscripted: 29
Western: 101
Arthouse: 132
Sports: 20
Military and War: 5
International: 47
Romance: 125
Young Adult Audience: 3
Talk Show and Variety: 3
LGBTQ: 12
Historical: 5
Musical: 2
Biographical: 34
Action-Adventure: 452
Docuseries: 253
Animals & Nature: 173
Lifestyle: 1
Movies: 56
Coming of Age: 56
Concert Film: 7
Anthology: 11
Reality: 4
Family: 20
Crime: 3
Talk Show: 1
Series: 3
Buddy: 20
Game Show / Competition: 1
Variety: 1
Dance: 1
Music: 1
Documentaries: 829
International TV Shows: 774
Crime TV Shows: 399
TV Dramas: 67
Children & Family Movies: 605
Dramas: 1599
British TV Shows: 253
Comedies: 1210
TV Comedies: 120
Thrillers: 65
Horror Movies: 275
Kids' TV: 387
Action & Adventure: 859
Reality TV: 120


### 5. Data Loading

In [168]:
import mysql.connector

try:
    # Connect to MySQL
    connection = mysql.connector.connect(
        host="localhost",
        user="cfitzpatrick",
        passwd="your_password",
        database="Group_5"
    )

   
    cursor = connection.cursor()

    # Define SQL statements to create tables
    create_movie_table_query = """
    CREATE TABLE IF NOT EXISTS movie (
        id INT AUTO_INCREMENT PRIMARY KEY,
        title VARCHAR(255),
        release_year INT,
        rating VARCHAR(10),
        duration VARCHAR(20),
        description TEXT,
        platform VARCHAR(50),
        headquarters VARCHAR(100),
        date_founded DATE
    )
    """

    create_genre_table_query = """
    CREATE TABLE IF NOT EXISTS genre (
        id INT AUTO_INCREMENT PRIMARY KEY,
        genre_name VARCHAR(255) UNIQUE
    )
    """

    create_streaming_platform_table_query = """
    CREATE TABLE IF NOT EXISTS streaming_platform (
        id INT AUTO_INCREMENT PRIMARY KEY,
        platform_name VARCHAR(50) UNIQUE,
        headquarters VARCHAR(100),
        date_founded DATE
    )
    """

    create_tv_show_table_query = """
    CREATE TABLE IF NOT EXISTS tv_show (
        id INT AUTO_INCREMENT PRIMARY KEY,
        title VARCHAR(255),
        release_year INT,
        rating VARCHAR(10),
        duration VARCHAR(20),
        description TEXT,
        platform VARCHAR(50),
        headquarters VARCHAR(100),
        date_founded DATE
    )
    """

    # Execute the queries
    cursor.execute(create_movie_table_query)
    cursor.execute(create_genre_table_query)
    cursor.execute(create_streaming_platform_table_query)
    cursor.execute(create_tv_show_table_query)

    # Commit the transaction
    connection.commit()

    print("Tables created successfully!")

except mysql.connector.Error as error:
    print("Error:", error)


Tables created successfully!


In [27]:
# Create rating table
try:
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS rating (
        id INT AUTO_INCREMENT PRIMARY KEY,
        rating_value VARCHAR(10) NOT NULL
    )
    """)
    
    print("Table 'rating' created successfully!")

except mysql.connector.Error as error:
    print("Error:", error)

Table 'rating' created successfully!


In [38]:
#See the tables for Group_5 schema 
query = "SHOW TABLES"
cursor.execute(query)
tables = cursor.fetchall()

# Print the table names
for table in tables:
    print(table[0])

genre
movie
rating
streaming_platform
tv_show


### Load data into SQL database tables 

In [29]:
# Iterate over each row in the DataFrame
for index, row in streaming_platforms_df.iterrows():
    # Check if the platform already exists
    platform_query = "SELECT * FROM streaming_platform WHERE platform_name = %s"
    cursor.execute(platform_query, (row['platform'],))
    existing_platform = cursor.fetchone()

    if not existing_platform:
        # Insert into streaming_platform table
        streaming_platform_query = "INSERT INTO streaming_platform (platform_name, headquarters, date_founded) VALUES (%s, %s, %s)"
        streaming_platform_data = (row['platform'], row['headquarters'], str(row['date_founded']))
        cursor.execute(streaming_platform_query, streaming_platform_data)

    # Insert into movie or TV show table
    if row['type'] == 'Movie':
        movie_query = "INSERT INTO movie (title, release_year, rating, duration, description, platform, headquarters, date_founded) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        movie_data = (row['title'], row['release_year'], row['rating'], row['duration'], row['description'], row['platform'], row['headquarters'], str(row['date_founded']))
        cursor.execute(movie_query, movie_data)
    elif row['type'] == 'TV Show':
        tv_show_query = "INSERT INTO tv_show (title, release_year, rating, duration, description, platform, headquarters, date_founded) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        tv_show_data = (row['title'], row['release_year'], row['rating'], row['duration'], row['description'], row['platform'], row['headquarters'], str(row['date_founded']))
        cursor.execute(tv_show_query, tv_show_data)

# Commit the changes and close the connection
connection.commit()

In [None]:
#Load table for genre

In [145]:
# Extract unique genres from the DataFrame
unique_genres = set()
for genres in streaming_platforms_df['genre'].str.split(', '):
    unique_genres.update(genres)

# Insert unique genres into the genre table
cursor = connection.cursor()
for genre in unique_genres:
    try:
        # Check if the genre already exists in the table
        cursor.execute("SELECT id FROM genre WHERE genre_name = %s", (genre,))
        result = cursor.fetchone()
        if result is None:
            cursor.execute("INSERT INTO genre (genre_name) VALUES (%s)", (genre,))
            connection.commit()
    except mysql.connector.Error as error:
        print("Error:", error)
        

        # Execute a query to select all values from the genre_name column in the genre table
cursor = connection.cursor()
cursor.execute("SELECT genre_name FROM genre")

# Fetch all the results
genre_names = cursor.fetchall()

# Print the genre names
for genre_name in genre_names:
    print(genre_name[0])

Action
Action & Adventure
Action-Adventure
Adventure
and Culture
Animals & Nature
Animation
Anime
Anime Features
Anime Series
Anthology
Arthouse
Arts
Biographical
British TV Shows
Buddy
Children & Family Movies
Classic & Cult TV
Classic Movies
Comedies
Comedy
Coming of Age
Concert Film
Crime
Crime TV Shows
Cult Movies
Dance
Disaster
Documentaries
Documentary
Docuseries
Drama
Dramas
Entertainment
Faith & Spirituality
Faith and Spirituality
Family
Fantasy
Fitness
Game Show / Competition
Historical
Horror
Horror Movies
Independent Movies
International
International Movies
International TV Shows
Kids
Kids' TV
Korean TV Shows
LGBTQ
LGBTQ Movies
Lifestyle
Medical
Military and War
Movies
Music
Music & Musicals
Music Videos and Concerts
Musical
Mystery
Parody
Police/Cop
Reality
Reality TV
Romance
Romantic Comedy
Romantic Movies
Romantic TV Shows
Sci-Fi & Fantasy
Science & Nature TV
Science Fiction
Series
Soap Opera / Melodrama
Spanish-Language TV Shows
Special Interest
Sports
Sports Movies
Spy

In [37]:
# # Update the main DataFrame to include genre IDs
# genre_ids = []
# for index, row in streaming_platforms_df.iterrows():
#     row_genre_ids = []
#     for genre in row['genre'].split(', '):
#         try:
#             cursor.execute("SELECT id FROM genre WHERE genre_name = %s", (genre,))
#             genre_id = cursor.fetchone()[0]
#             row_genre_ids.append(genre_id)
#         except mysql.connector.Error as error:
#             print("Error:", error)
#     genre_ids.append(row_genre_ids)

# # Add a new column 'genre_ids' to the DataFrame
# streaming_platforms_df['genre_ids'] = genre_ids

# # Commit the changes
# connection.commit()


### Check our tables

In [132]:
try:
    # Connect to MySQL
    connection = mysql.connector.connect(
        host="localhost",
        user="cfitzpatrick",
        passwd="your_password",
        database="Group_5"
    )

    cursor = connection.cursor()

    # Define SQL queries to select data from each table
    select_movie_query = "SELECT * FROM movie LIMIT 5"
    select_genre_query = "SELECT * FROM genre LIMIT 5"
    select_streaming_platform_query = "SELECT * FROM streaming_platform LIMIT 5"
    select_tv_show_query = "SELECT * FROM tv_show LIMIT 5"

    # Execute the queries
    cursor.execute(select_movie_query)
    movie_data = cursor.fetchall()

    cursor.execute(select_genre_query)
    genre_data = cursor.fetchall()

    cursor.execute(select_streaming_platform_query)
    streaming_platform_data = cursor.fetchall()

    cursor.execute(select_tv_show_query)
    tv_show_data = cursor.fetchall()

    # Print the data
    print("Movie Table Data:")
    for row in movie_data:
        print(row)

    print("\nGenre Table Data:")
    for row in genre_data:
        print(row)

    print("\nStreaming Platform Table Data:")
    for row in streaming_platform_data:
        print(row)

    print("\nTV Show Table Data:")
    for row in tv_show_data:
        print(row)

except mysql.connector.Error as error:
    print("Error:", error)


        

Movie Table Data:
(1, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(2, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(3, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(4, 'Zoombies', 2016, '13+', '87 min', 'When a strange virus quickly spreads through a safari park and turns all the zoo animals undead, those left in the park must stop the creatures before they escape and zombify the whole city.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(5, 'Zoe', 2018, 'R', '104 min', 'ZOE tells a tale of fo

In [128]:
#pip install --upgrade bokeh

In [129]:
#pip install --upgrade panel

## Visualization Dashboard using logging

In [159]:
import mysql.connector

# Connect to MySQL
connection = mysql.connector.connect(
    host="localhost",
    user="cfitzpatrick",
    passwd="your_password",
    database="Group_5"
)

# Define SQL query to fetch data
query = "SELECT * FROM genre"

# Execute SQL query and fetch data into a DataFrame
import pandas as pd
try:
    streaming_platforms_df = pd.read_sql(query, connection)
    print("Data successfully fetched from the database.")
    print(streaming_platforms_df.head())  # Print the first few rows of the DataFrame
except Exception as e:
    print("Error fetching data:", e)

# Close the connection
connection.close()


Data successfully fetched from the database.
   id          genre_name
0   9              Action
1  40  Action & Adventure
2  74    Action-Adventure
3  58           Adventure
4  76         and Culture


In [179]:
import pandas as pd
import mysql.connector
import panel as pn
import holoviews as hv

# Connect to MySQL
connection = mysql.connector.connect(
    host="localhost",
    user="cfitzpatrick",
    passwd="your_password",
    database="Group_5"
)

# Define SQL query to fetch data
query = "SELECT * FROM genre"

# Execute SQL query and fetch data into a DataFrame
genre_df = pd.read_sql(query, connection)

# Close the connection
connection.close()

# Create a list of all unique genres
genres = genre_df['genre_name'].tolist()

# Create dropdown widget
genre_select = pn.widgets.Select(name='Select Genre', options=genres)

# Define a function to create a plot based on the selected genre
@pn.depends(genre_select.param.value)
def create_genre_plot(genre):
    # Retrieve data for the selected genre from the DataFrame or SQL query
    # Example: df = streaming_platforms_df[streaming_platforms_df['genre'] == genre]
    # Replace the above line with appropriate SQL query to fetch data from the database
    
    # For demonstration, let's create a random plot
    data = {'x': range(10), 'y': [i ** 2 for i in range(10)]}
    plot = hv.Points(data).opts(title=f'{genre} Plot')
    return plot

# Create a Panel dashboard
dashboard = pn.Column(
    '## Group Five Streaming Dashboard',
    '## Select Genre',
    genre_select,
    '## Genre Plot',
    create_genre_plot
)

# Show the dashboard
dashboard.show()


Launching server at http://localhost:59499


<panel.io.server.Server at 0x7fdb24242c10>

INFO:tornado.access:200 GET / (::1) 400.61ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/datatabulator/tabulator-tables@5.5.0/dist/css/tabulator_simple.min.css (::1) 1.76ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/datatabulator/luxon/build/global/luxon.min.js (::1) 8.80ms
INFO:tornado.access:200 GET /static/js/bokeh-gl.min.js?v=6721b980580fb8317f83927f85b92aea8fec9bcd94ff58f895f85e5617987ee82f89b5da322b9cdb482c46ac2bd3442466f7ce2484bae8b414318a7ace09e901 (::1) 14.17ms
INFO:tornado.access:200 GET /static/js/bokeh-widgets.min.js?v=31de1843fe9e438f1829e492effd36329ababde542605297701ad7d96d2fc86464984340ba7645878652d1dccf43f535033e4cc55a45008c5bba7c20ae4577b9 (::1) 15.97ms
INFO:tornado.access:200 GET /static/extensions/panel/bundled/datatabulator/tabulator-tables@5.5.0/dist/js/tabulator.min.js (::1) 15.09ms
INFO:tornado.access:200 GET /static/extensions/panel/panel.min.js?v=4dc754e770a2aaf1bd357f00e2c1ffb347b27f3b739a37791169825d30b0a501 (::1) 20.12ms
I

In [177]:
# import pandas as pd
# import mysql.connector
# import panel as pn
# import holoviews as hv

# # Connect to MySQL
# connection = mysql.connector.connect(
#     host="localhost",
#     user="cfitzpatrick",
#     passwd="your_password",
#     database="Group_5"
# )

# # Define SQL queries to fetch data
# movie_query = "SELECT * FROM movie"
# rating_query = "SELECT * FROM rating"
# platform_query = "SELECT * FROM streaming_platform"
# genre_query = "SELECT * FROM genre"

# # Execute SQL queries and fetch data into DataFrames
# movie_df = pd.read_sql(movie_query, connection)
# rating_df = pd.read_sql(rating_query, connection)
# platform_df = pd.read_sql(platform_query, connection)
# genre_df = pd.read_sql(genre_query, connection)

# # Close the connection
# connection.close()

# # Define a function to create the dashboard
# def create_dashboard():
#     # Create dropdown widgets
#     platform_select = pn.widgets.Select(name='Select Platform', options=platform_df['platform_name'].tolist())
#     genre_select = pn.widgets.Select(name='Select Genre', options=genre_df['genre_name'].tolist())
    
#     # Define functions to create plots based on the selected platform and genre
#     @pn.depends(platform_select.param.value)
#     def create_release_year_plot(platform):
#         # Retrieve data for the selected platform from the movie DataFrame
#         df = movie_df[movie_df['platform'] == platform]
#         plot = df['release_year'].value_counts().sort_index().hvplot(title=f'{platform} Release Year Plot')
#         return plot

#     @pn.depends(platform_select.param.value)
#     def create_rating_plot(platform):
#         # Retrieve data for the selected platform from the rating DataFrame
#         df = rating_df.merge(movie_df, how='inner', on='id')
#         df = df[df['platform'] == platform]
#         plot = df['rating_value'].value_counts().hvplot.bar(title=f'{platform} Rating Plot')
#         return plot

#     @pn.depends(genre_select.param.value)
#     def create_genre_plot(genre):
#         # Retrieve data for the selected genre from the movie DataFrame
#         df = movie_df[movie_df['genre'].str.contains(genre)]
#         top_genres = df['genre'].value_counts().nlargest(5)
#         plot = top_genres.hvplot.bar(title=f'Top Genres with {genre}')
#         return plot

#     # Create a Panel dashboard
#     dashboard = pn.Column(
#         '## Group Five Streaming Dashboard',
#         '## Select Platform',
#         platform_select,
#         '## Release Year Plot',
#         create_release_year_plot,
#         '## Rating Plot',
#         create_rating_plot,
#         '## Select Genre',
#         genre_select,
#         '## Genre Plot',
#         create_genre_plot
#     )
    
#     return dashboard

# # Show the dashboard
# create_dashboard().show()


In [176]:
import panel as pn
import hvplot.pandas
import logging

# Configure logging
logging.basicConfig(filename='dashboard.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


# Create a list of all unique platforms
platforms = streaming_platforms_df['platform'].unique().tolist()

# Create dropdown widget
platform_select = pn.widgets.Select(name='Select Platform', options=platforms)

# Define a function to create a plot based on the selected platform
@pn.depends(platform_select.param.value)
def create_release_year_plot(platform):
    try:
        df = streaming_platforms_df[streaming_platforms_df['platform'] == platform]
        return df['release_year'].value_counts().sort_index().hvplot()
    except Exception as e:
        logging.error(f"Error in create_release_year_plot: {e}")

@pn.depends(platform_select.param.value)
def create_rating_plot(platform):
    try:
        df = streaming_platforms_df[streaming_platforms_df['platform'] == platform]
        return df['rating'].value_counts().hvplot.bar()
    except Exception as e:
        logging.error(f"Error in create_rating_plot: {e}")

@pn.depends(platform_select.param.value)
def create_genre_plot(platform):
    try:
        df = streaming_platforms_df[streaming_platforms_df['platform'] == platform]
        top_genres = df['genre'].value_counts().nlargest(5)  # Get the top 10 genres
        return top_genres.hvplot.bar()
    except Exception as e:
        logging.error(f"Error in create_genre_plot: {e}")

# Create a Panel dashboard
dashboard = pn.Column(
    '## Select Platform',
    platform_select,
    '## Release Year Plot',
    create_release_year_plot,
    '## Rating Plot',
    create_rating_plot,
    '## Genre Plot',
    create_genre_plot
)

# Show the dashboard
dashboard.show()

KeyError: 'platform'

In [None]:
# import panel as pn
# import hvplot.pandas

# # Create interactive plots using hvPlot
# platform_plot = streaming_platforms_df['platform'].value_counts().hvplot.bar()
# release_year_plot = streaming_platforms_df['release_year'].hvplot.hist()

# # Create a Panel dashboard
# dashboard = pn.Column(
#     '# Streaming Platforms Dashboard',
#     '## Platform Distribution',
#     platform_plot,
#     '## Release Year Distribution',
#     release_year_plot,
# )

# # Show the dashboard
# dashboard.show()

In [166]:
import mysql.connector

try:
    # Connect to MySQL
    connection = mysql.connector.connect(
        host="localhost",
        user="cfitzpatrick",
        passwd="your_password",
        database="Group_5"
    )

    # Create a cursor object
    cursor = connection.cursor()

    # List of tables
    tables = ['genre', 'movie', 'rating', 'streaming_platform', 'tv_show']

    # Iterate over tables
    for table in tables:
        print(f"Table: {table}")
        # Execute the DESCRIBE command to see the structure of the table
        cursor.execute(f"DESCRIBE {table}")

        # Fetch all the rows
        rows = cursor.fetchall()

        # Print the column details
        for row in rows:
            print(row)

        print()  # Add a blank line between tables

    # Close the cursor and connection
    cursor.close()
    connection.close()

except mysql.connector.Error as error:
    print("Error while connecting to MySQL", error)


Table: genre
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('genre_name', 'varchar(255)', 'YES', 'UNI', None, '')

Table: movie
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('title', 'varchar(255)', 'YES', '', None, '')
('release_year', 'int', 'YES', '', None, '')
('rating', 'varchar(10)', 'YES', '', None, '')
('duration', 'varchar(20)', 'YES', '', None, '')
('description', 'text', 'YES', '', None, '')
('platform', 'varchar(50)', 'YES', '', None, '')
('headquarters', 'varchar(100)', 'YES', '', None, '')
('date_founded', 'date', 'YES', '', None, '')

Table: rating
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('rating_value', 'varchar(10)', 'NO', '', None, '')

Table: streaming_platform
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('platform_name', 'varchar(50)', 'YES', 'UNI', None, '')
('headquarters', 'varchar(100)', 'YES', '', None, '')
('date_founded', 'date', 'YES', '', None, '')

Table: tv_show
('id', 'int', 'NO', 'PRI', None, 'auto_increment')
('title', 'var

In [171]:
import mysql.connector
from mysql.connector import Error

try:
    # Establish a connection to MySQL
    connection = mysql.connector.connect(
        host="localhost",
        user="cfitzpatrick",
        passwd="your_password",
        database="Group_5"
    )

    # Check if the connection is successful
    if connection.is_connected():
        # Define the SQL queries to select data from each table
        query_genre = "SELECT * FROM genre LIMIT 5"
        query_movie = "SELECT * FROM movie LIMIT 5"
        query_rating = "SELECT * FROM rating LIMIT 5"
        query_streaming_platform = "SELECT * FROM streaming_platform LIMIT 5"
        query_tv_show = "SELECT * FROM tv_show LIMIT 5"

        # Execute the queries
        cursor = connection.cursor()

        cursor.execute(query_genre)
        result_genre = cursor.fetchall()

        cursor.execute(query_movie)
        result_movie = cursor.fetchall()

        cursor.execute(query_rating)
        result_rating = cursor.fetchall()

        cursor.execute(query_streaming_platform)
        result_streaming_platform = cursor.fetchall()

        cursor.execute(query_tv_show)
        result_tv_show = cursor.fetchall()

        # Print the results
        print("Genre:")
        for row in result_genre:
            print(row)

        print("\nMovie:")
        for row in result_movie:
            print(row)

        print("\nRating:")
        for row in result_rating:
            print(row)

        print("\nStreaming Platform:")
        for row in result_streaming_platform:
            print(row)

        print("\nTV Show:")
        for row in result_tv_show:
            print(row)

except Error as e:
    print("Error while connecting to MySQL", e)

finally:
    # Close the connection
    if 'connection' in locals():
        connection.close()



Genre:
(9, 'Action')
(40, 'Action & Adventure')
(74, 'Action-Adventure')
(58, 'Adventure')
(76, 'and Culture')

Movie:
(1, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(2, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(3, 'Take Care Good Night', 2018, '13+', '110 min', 'A Metro Family decides to fight a Cyber Criminal threatening their stability and pride.', 'Amazon Prime', 'Seattle, Washington', datetime.date(2005, 2, 2))
(4, 'Zoombies', 2016, '13+', '87 min', 'When a strange virus quickly spreads through a safari park and turns all the zoo animals undead, those left in the park must stop the creatures before they escape and zombify the whole city.', 'Amazon Prime', 'Seat