# Install Necessary Libraries

In [1]:
import pandas as pd
import plotly.express as px

# DataFrame of Movies and Actors

In [4]:
df = pd.DataFrame({
    'Actor': ['Leonardo Dicaprio','Leonardo Dicaprio','Leonardo Dicaprio',
              'Jake Gyllenhaal','Jake Gyllenhaal','Jake Gyllenhaal',
              'Christian Bale','Christian Bale','Christian Bale',
              'Brad Pitt','Brad Pitt','Brad Pitt'],
    'Movie': ['The Revenant','Inception','The Wolf of Wall Street',
              'Prisoners','Zodiac','Nightcrawler',
              'American Psycho','Ford vs Ferrari','The Dark Knight',
              'Fight Club','SE7EN','F1'],
    'Year': [2015, 2010, 2013,
             2013, 2007, 2014,
             2000, 2019, 2008,
             1999, 1995, 2009],
    'Genre': ['Adventure','Sci-Fi','Biography',
              'Thriller','Crime','Thriller',
              'Horror','Drama','Action',
              'Drama','Thriller','Action'],
    'Rating': [8.0, 8.8, 8.2,
               8.1, 7.7, 7.8,
               7.6, 8.1, 9.0,
               8.8, 8.6, 7.1],
    'BoxOffice': [533000000, 829000000, 392000000,
                  122000000, 84_000_000, 87_000_000,
                  34_000_000, 117_000_000, 1005_000_000,
                  101_000_000, 327_000_000, 25_000_000]
})
df

Unnamed: 0,Actor,Movie,Year,Genre,Rating,BoxOffice
0,Leonardo Dicaprio,The Revenant,2015,Adventure,8.0,533000000
1,Leonardo Dicaprio,Inception,2010,Sci-Fi,8.8,829000000
2,Leonardo Dicaprio,The Wolf of Wall Street,2013,Biography,8.2,392000000
3,Jake Gyllenhaal,Prisoners,2013,Thriller,8.1,122000000
4,Jake Gyllenhaal,Zodiac,2007,Crime,7.7,84000000
5,Jake Gyllenhaal,Nightcrawler,2014,Thriller,7.8,87000000
6,Christian Bale,American Psycho,2000,Horror,7.6,34000000
7,Christian Bale,Ford vs Ferrari,2019,Drama,8.1,117000000
8,Christian Bale,The Dark Knight,2008,Action,9.0,1005000000
9,Brad Pitt,Fight Club,1999,Drama,8.8,101000000


## MODULE 1: Plotly Basics
## What is Plotly?

### Plotly is a Python library to make interactive charts.

### “Interactive” = you can hover, zoom, filter, click on data.

### Hover = shows extra info when you move your mouse over a point.

## Key concepts:

### px = Plotly Express → simple, high-level API, easiest to use.

### go = Graph Objects → detailed, highly customizable.

# Scatter Plot

### Use when: You want to see relationship between 2 numeric variables.

In [6]:
# Scatter Plot: Rating vs BoxOffice
fig = px.scatter(df, x='Rating', y='BoxOffice', color='Genre', hover_data=['Movie','Actor'],
                 size='BoxOffice', title='Rating vs BoxOffice by Genre')
fig.show()

# Bar Chart

### Use when: You want to compare categorical data, e.g., movies per actor.

In [7]:
# Count of movies per actor
fig = px.bar(df, x='Actor', y=df.groupby('Actor')['Movie'].transform('count'),
             color='Actor', title='Number of Movies per Actor')
fig.show()

# Box Plot

### Use when: You want to see distribution of a numeric variable.

### points='all' → shows all individual points over boxplot.

### Boxplots show median, quartiles, outliers.

In [8]:
fig = px.box(df, x='Genre', y='Rating', color='Genre', points='all',
             title='Rating Distribution by Genre')
fig.show()

# Histogram

### Use when: You want to see frequency of numeric values, e.g., how many movies have certain ratings.

In [9]:
fig = px.histogram(df, x='Rating', nbins=5, color='Genre', title='Distribution of Ratings')
fig.show()

## Step-by-Step Explanation
### 1. Bins

### Ratings range: 7.1 – 9.0

### nbins=5 → Divide into 5 equal-width bins:

| Bin # | Range (approx) |
| ----- | -------------- |
| 1     | 7.1 – 7.46     |
| 2     | 7.46 – 7.82    |
| 3     | 7.82 – 8.18    |
| 4     | 8.18 – 8.54    |
| 5     | 8.54 – 9.0     |
### 2. Count of movies in each bin

| Bin   | Ratings in Bin     | Count | Genres in Bin                   |
| ----- | ------------------ | ----- | ------------------------------- |
| 1     | 7.1                | 1     | Action (F1)                     |
| 2     | 7.6, 7.7, 7.8      | 3     | Horror, Crime, Thriller         |
| 3     | 8.0, 8.1, 8.1      | 3     | Adventure, Thriller, Drama      |
| 4     | 8.2                | 1     | Biography                       |
| 5     | 8.6, 8.8, 8.8, 9.0 | 4     | Thriller, Sci-Fi, Drama, Action |


# Pie Chart

### Use when: You want to show part-to-whole relationships, e.g., genre share.

In [10]:
fig = px.pie(df, names='Genre', title='Movie Genre Distribution')
fig.show()

# Line Chart

### Use when: You want to see trend over time, e.g., BoxOffice by Year.

In [11]:
fig = px.line(df, x='Year', y='BoxOffice', color='Actor', markers=True,
              hover_data=['Movie'], title='BoxOffice Trend Over Years')
fig.show()

# MODULE 2: Plotly Graph Objects (Advanced Customization)

## go.Figure() allows fine-grained control.

## Example: combining line + scatter + multiple axes.

In [15]:
import plotly.graph_objects as go

fig = go.Figure()

# Add Scatter for Ratings
fig.add_trace(go.Scatter(x=df['Movie'], y=df['Rating'], mode='markers+lines', name='Rating'))

# Add Bar for BoxOffice
fig.add_trace(go.Bar(x=df['Movie'], y=df['BoxOffice']/1e6, name='BoxOffice (M)', yaxis='y2'))

# Add second y-axis
fig.update_layout(
    title='Rating vs BoxOffice per Movie',
    yaxis=dict(title='Rating'),
    yaxis2=dict(title='BoxOffice (Millions)', overlaying='y', side='right'),
    xaxis=dict(title='Movie')
)

fig.show()


# MODULE 3: Interactivity Features in Plotly

## Hover → shows extra info.

## Click → you can select/filter points.

## Zoom/Pan → explore detailed data.

## Legends → toggle series on/off.

In [16]:
fig = px.scatter(df, x='Rating', y='BoxOffice', color='Actor', hover_data=['Movie','Genre'])
fig.update_traces(marker=dict(size=15, line=dict(width=2, color='DarkSlateGrey')))
fig.show()