In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

#plotly library
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected=True)

import matplotlib.pyplot as plt

from wordcloud import wordcloud

#from a unix time to a date
from time import strftime
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('Advertising And Sales.csv')
df

Unnamed: 0,ID,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,12.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,11.0,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   ID         200 non-null    int64  
 1   TV         200 non-null    float64
 2   Radio      200 non-null    float64
 3   Newspaper  200 non-null    float64
 4   Sales      200 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB


In [4]:
df.isna().sum()

ID           0
TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [5]:
df.describe()

Unnamed: 0,ID,TV,Radio,Newspaper,Sales
count,200.0,200.0,200.0,200.0,200.0
mean,100.5,147.0325,23.2895,30.554,14.038
std,57.879185,85.885186,14.86997,21.778621,5.2094
min,1.0,0.7,0.0,0.3,1.6
25%,50.75,74.375,10.075,12.75,10.4
50%,100.5,149.75,22.9,25.75,12.9
75%,150.25,218.825,36.525,45.1,17.4
max,200.0,296.4,49.6,114.0,27.0


In [4]:
df['Radio'].max()

49.6

In [6]:
radio_betn = df[(df['Radio']>45) & (df['Radio']<=50)]

In [7]:
trace1 = go.Scatter(
    x = df.index,
    y = radio_betn['Radio'],
    name = 'Radio',
    mode = 'lines',
    marker = dict(color = 'rgba(80,27,80,0.7)'),
    text = df.TV
)

data = [trace1]

layout = dict(title = 'Radio between 45 and 50',
             xaxis = dict(title = 'index', ticklen = 5, zeroline = False))

fig = dict(data = data, layout = layout)
iplot(fig)

# Bar plot

In [8]:
news_greater_100 = df[df['Newspaper']>100]
sales_greater_20 = df[df['Sales']>20]

In [9]:
# bar for newspaper

trace2 = go.Bar(
    x = news_greater_100['Newspaper'],
    y = news_greater_100['Radio'],
    name = "Radio",
    marker=dict(color='rgba(255, 58, 255, 0.4)',
                line=dict(color='rgb(0,0,0)', width=1.5)),
    text = news_greater_100['ID']
)

trace3 = go.Bar(
    x = sales_greater_20['Newspaper'],
    y = sales_greater_20['TV'],
    name = "TV",
   marker=dict(color='rgba(15, 15, 250, 0.4)',
                line=dict(color='rgb(0,0,0)', width=1.5)),
    text = news_greater_100['Sales']
)

#Combining the newspaper

data2 = [trace2, trace3]

layout = go.Layout(
    barmode = "group",
    title = "Radio and TV bar",
    xaxis = dict(title = "Newspaper"),
    yaxis = dict(title = "Count"),
)

fig = go.Figure(data = data2, layout = layout)
iplot(fig)

In [10]:
news_greater_100 = df[df['Newspaper'] > 100]
sales_greater_20 = df[df['Sales'] > 20]

# Bar for newspaper
trace2 = go.Bar(
    x=news_greater_100['Newspaper'],
    y=news_greater_100['Radio'],
    name="Radio",
    marker=dict(color='rgba(255, 58, 255, 0.4)',
                line=dict(color='rgb(0,0,0)', width=1.5)),
    text=news_greater_100['ID']
)

trace3 = go.Bar(
    x=sales_greater_20['Newspaper'],
    y=sales_greater_20['TV'],
    name="TV",
    marker=dict(color='rgba(15, 15, 250, 0.4)',
                line=dict(color='rgb(0,0,0)', width=1.5)),
    text=sales_greater_20['Sales']
)

# Combining the newspaper
data2 = [trace2, trace3]

layout = go.Layout(
    barmode="group",
    title="Radio and TV bar",
    xaxis=dict(title="Newspaper"),
    yaxis=dict(title="Count"),
)

fig = go.Figure(data=data2, layout=layout)
iplot(fig)

In [11]:
data_sorted = df.sort_values(by='Newspaper', ascending=False)
df_20 = data_sorted.iloc[:20, :]
df_20['TV'] = np.arange(1, len(df_20) + 1)

# Generating 3D scatter plot
trace1 = go.Scatter3d(
    x=df_20['TV'],
    y=df_20['Radio'],
    z=df_20['Sales'],
    mode='markers',
    marker=dict(
        size=12,
        color=df_20['Sales'],
        colorscale='Viridis',
        opacity=0.8
    ),
    text=df_20['ID']
)

data = [trace1]
layout = go.Layout(
    title="3D Scatter Plot of Sales, Radio, and TV",
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    ),
    scene=dict(
        xaxis_title='TV',
        yaxis_title='Radio',
        zaxis_title='Sales'
    )
)

fig = go.Figure(data=data, layout=layout)

fig.show()

# Scattplot matrix

In [12]:
import plotly.figure_factory as ff

data_radio_and_tv = df.loc[:, ["Radio", "TV"]]
data_radio_and_tv['index'] = np.arange(1, len(data_radio_and_tv)+1)

fig = ff.create_scatterplotmatrix(data_radio_and_tv, diag = 'box', index = 'index',
                                  colormap = 'Portland',
                                  colormap_type = 'cat',
                                  height = 700, width = 700)

iplot(fig)