# Testing pyplot

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
from google.colab import files
import plotly.io as pio


In [3]:
# 1. Download report from the link with the same dimensions (can be with any filters): https://admanager.google.com/8095840#reports/report/create/results_id=13740157286

# 2. Apply some data cleaning, so that column names are row 1, Total row is removed, ID columns removed and the rest of the columns have the following names:
#     ad_unit	device_cat	month_year	unfilled	total_im	revenue

# 3. Use the below command to upload the CSV file

uploaded = files.upload()

Saving unfilled_dash.csv to unfilled_dash.csv


In [4]:
df = pd.read_csv("unfilled_dash.csv")

In [5]:
# Some data cleaning
df['revenue'] = df['revenue'].str.replace(',', '')
df['total_im'] = df['total_im'].str.replace(',', '')
df['unfilled'] = df['unfilled'].str.replace(',', '')
df['revenue'] = pd.to_numeric(df['revenue'].astype(float))
df['total_im'] = pd.to_numeric(df['total_im'])
df['unfilled'] = pd.to_numeric(df['unfilled'])
df['month_year'] = pd.to_datetime(df['month_year'], format='%b-%y').dt.to_period('M')

# Drop rows where 'device_cat' is equal to 'connected tv' or 'f phone'
df = df.drop(df[(df['device_cat'] == 'Connected TV') | (df['device_cat'] == 'Feature phone') | (df['device_cat'] == 'Set-top box')].index)

# Reset the index
df = df.reset_index(drop=True)
df

Unnamed: 0,ad_unit,device_cat,month_year,unfilled,total_im,revenue
0,.2_A.40299.4_gsmarena.com_tier1 (22689976488),Desktop,2022-04,94495,1607651,1464.21
1,.2_A.40299.4_gsmarena.com_tier1 (22689976488),Desktop,2022-05,308382,2087808,1930.79
2,.2_A.40299.4_gsmarena.com_tier1 (22689976488),Desktop,2022-06,424510,5683054,5872.45
3,.2_A.40299.4_gsmarena.com_tier1 (22689976488),Desktop,2022-07,545819,5949720,5190.78
4,.2_A.40299.4_gsmarena.com_tier1 (22689976488),Desktop,2022-08,652614,5767744,5650.40
...,...,...,...,...,...,...
1080,.2_A.38135.3_gsmarena.com_tier1 (22151678970),Tablet,2023-04,1,2084,0.01
1081,.2_A.38135.3_gsmarena.com_tier1 (22151678970),Tablet,2023-05,1,0,0.00
1082,.2_A.38136.3_gsmarena.com_tier1 (22151364694),Desktop,2023-02,1,0,0.00
1083,.2_A.38136.3_gsmarena.com_tier1 (22151364694),Tablet,2022-08,2,0,0.00


In [6]:
# Ensure unfilled, total_im and revenue are numeric
df.dtypes

ad_unit          object
device_cat       object
month_year    period[M]
unfilled          int64
total_im          int64
revenue         float64
dtype: object

In [7]:
df.describe().loc["max"].round(0)

unfilled    23503402.0
total_im    63720751.0
revenue        26255.0
Name: max, dtype: float64

In [None]:
# dimensions can be adjusted - whether the purpose is to check ad units or devices
fig = px.scatter(df, x="total_im", y="unfilled", animation_frame="month_year", animation_group="ad_unit",
           size="revenue", color="device_cat", hover_name="ad_unit",
           log_x=False, size_max=60, range_x=[0,70_000_000], range_y=[0,25_000_000],
           height=700, width=1300)
# Change the speed of the animation - more -> slower
fig.update_layout(transition = {'duration': 2000})
fig.show()

# IF download is needed in order to be shared:

In [None]:
# create HTML
html_file = "/content/animated_scatter_plot.html"
# Save the plot as an HTML file
fig.write_html(html_file)
# Download the HTML file
files.download(html_file)

#click below to download

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>