# Ad_Analysis

## 1.0 Importing Essential Libraries

In [62]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib as plt

from sklearn.cluster import KMeans

%matplotlib inline

## 2.0 Loading dataset

In [4]:
ad_df = pd.read_csv('ad_table.csv')
ad_df.head()

Unnamed: 0,date,shown,clicked,converted,avg_cost_per_click,total_revenue,ad
0,10/1/15,65877,2339,43,0.9,641.62,ad_group_1
1,10/2/15,65100,2498,38,0.94,756.37,ad_group_1
2,10/3/15,70658,2313,49,0.86,970.9,ad_group_1
3,10/4/15,69809,2833,51,1.01,907.39,ad_group_1
4,10/5/15,68186,2696,41,1.0,879.45,ad_group_1


## 3.0 Exploring The Data

In [5]:
# Shape of the data
ad_df.shape

(2115, 7)

In [6]:
# information about the dataset
ad_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2115 entries, 0 to 2114
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date                2115 non-null   object 
 1   shown               2115 non-null   int64  
 2   clicked             2115 non-null   int64  
 3   converted           2115 non-null   int64  
 4   avg_cost_per_click  2115 non-null   float64
 5   total_revenue       2115 non-null   float64
 6   ad                  2115 non-null   object 
dtypes: float64(2), int64(3), object(2)
memory usage: 115.8+ KB


In [7]:
# More info about the numerical dataset
ad_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
shown,2115.0,68299.844444,48884.821409,0.0,28030.5,54029.0,97314.5,192507.0
clicked,2115.0,3056.077069,3783.579969,0.0,744.0,1392.0,3366.5,20848.0
converted,2115.0,126.453901,233.420826,0.0,18.0,41.0,103.0,1578.0
avg_cost_per_click,2115.0,1.373749,0.754331,0.0,0.76,1.4,1.92,4.19
total_revenue,2115.0,1966.517589,3942.018757,-200.15,235.47,553.3,1611.495,39623.71


In [8]:
# Checking for missing values

# As seen below, the dataset doesnt have any missing values

ad_df.isnull().sum()

date                  0
shown                 0
clicked               0
converted             0
avg_cost_per_click    0
total_revenue         0
ad                    0
dtype: int64

In [19]:
# showing parameters by ad groups

ad_df.groupby('ad').mean()

Unnamed: 0_level_0,shown,clicked,converted,avg_cost_per_click,total_revenue
ad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ad_group_1,69345.566038,2705.830189,47.113208,0.971321,743.015849
ad_group_10,117701.365385,1671.115385,19.538462,1.166154,299.612115
ad_group_11,18713.226415,1142.735849,55.377358,1.620566,767.024906
ad_group_12,28624.339623,1710.981132,156.509434,2.009623,2439.359245
ad_group_13,155424.113208,13319.641509,1259.264151,1.674717,19904.943962
ad_group_14,8461.096154,253.211538,20.961538,0.709231,320.639615
ad_group_15,15539.566038,356.566038,16.660377,0.80283,253.58566
ad_group_16,29595.075472,788.0,69.849057,0.542642,1056.845849
ad_group_17,139477.811321,1228.962264,14.358491,0.396792,189.874717
ad_group_18,89124.346154,8819.634615,616.134615,2.000192,10052.245769


In [29]:
# It is clear from the table below that some ads have converted way better than others

ad_df.groupby('ad').mean()[['converted']]

Unnamed: 0_level_0,converted
ad,Unnamed: 1_level_1
ad_group_1,47.113208
ad_group_10,19.538462
ad_group_11,55.377358
ad_group_12,156.509434
ad_group_13,1259.264151
ad_group_14,20.961538
ad_group_15,16.660377
ad_group_16,69.849057
ad_group_17,14.358491
ad_group_18,616.134615


# Challenge Question - 1

      1. If you had to identify the 5 best ad groups, which ones would they be? Which metric did
    you choose to identify the best ad groups? Why? Explain the pros of your metric as well
    as the possible cons.

### Metric Used

    * The best five ad groups can be identified based on various metrics. For this analysis, I believe overall profit from an ad group would be perfect since they are intended to make us profits. 

    * Hence, the profit should be computed as total revenue obtained from the ad subtracted by the total cost the company paid per click

    * Profit = (Total Revenue - (Average cost per click * number of times the ad is clicked))/ Number of times the add is shown

### Pros and Cons

    * Pros 
        - The metric fully considers the money involved (revenue and cost) with each ad 
        - priortize direct purpose of ads - to make profits

    * Cons 
        - The metric doesnt consider conversion rate
        - The metric might be biased towards the amount of time an ad is shown to user and cost


In [49]:
# profit_per_show 

ad_df['profit'] = (ad_df['total_revenue'] - (ad_df['avg_cost_per_click']*ad_df['clicked']))
ad_df.head()

Unnamed: 0,date,shown,clicked,converted,avg_cost_per_click,total_revenue,ad,profit
0,10/1/15,65877,2339,43,0.9,641.62,ad_group_1,-1463.48
1,10/2/15,65100,2498,38,0.94,756.37,ad_group_1,-1591.75
2,10/3/15,70658,2313,49,0.86,970.9,ad_group_1,-1018.28
3,10/4/15,69809,2833,51,1.01,907.39,ad_group_1,-1953.94
4,10/5/15,68186,2696,41,1.0,879.45,ad_group_1,-1816.55


In [52]:
ad_df.shape

(2115, 8)

In [57]:
# A new dataframe by grouping ads with total profit and number of times they are shown

ad_grouped = ad_df.groupby('ad')['profit', 'shown'].sum()
ad_grouped_df = pd.DataFrame(ad_grouped)
ad_grouped_df

Unnamed: 0_level_0,profit,shown
ad,Unnamed: 1_level_1,Unnamed: 2_level_1
ad_group_1,-100802.69,3675315
ad_group_10,-85808.3,6120471
ad_group_11,-65985.38,991801
ad_group_12,-60901.57,1517090
ad_group_13,-182233.01,8237478
ad_group_14,7252.95,439977
ad_group_15,-1888.62,823597
ad_group_16,32179.55,1568539
ad_group_17,-15850.14,7392324
ad_group_18,-400822.23,4634466


In [59]:
# Calculating the profit made by each ad per one show

ad_grouped_df['profit_per_view'] = ad_grouped_df['profit']/ad_grouped_df['shown']

# sorting ad groups according to profit per view
ad_grouped_df = ad_grouped_df.sort_values(by='profit_per_view', ascending=False)

ad_grouped_df

Unnamed: 0_level_0,profit,shown,profit_per_view
ad,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ad_group_16,32179.55,1568539,0.020516
ad_group_2,54456.44,2707080,0.020116
ad_group_14,7252.95,439977,0.016485
ad_group_31,40265.93,6914228,0.005824
ad_group_27,2528.73,3487872,0.000725
ad_group_9,-441.44,6417400,-6.9e-05
ad_group_36,-1987.58,3552407,-0.00056
ad_group_17,-15850.14,7392324,-0.002144
ad_group_15,-1888.62,823597,-0.002293
ad_group_25,-23321.96,9054932,-0.002576


In [61]:
# Hence the top five ads based on their profits per one show are as follows
ad_grouped_df[0:5]['profit_per_view']

ad
ad_group_16    0.020516
ad_group_2     0.020116
ad_group_14    0.016485
ad_group_31    0.005824
ad_group_27    0.000725
Name: profit_per_view, dtype: float64

# Challenge Question 2

*     Cluster ads into 3 groups: the ones whose avg_cost_per_click is going up, the ones
        whose avg_cost_per_click is flat and the ones whose avg_cost_per_click is going down.

In [None]:
# Clustring clicks per cost into three classes of increasing 