# The aim of this mini project is to predict sales using a number of influencing factors like TV,radio and newspaper ads.
# we want to know how the combination of these different advertising mediums will affect sales.

In [1]:
import pandas as pd

In [2]:
#load advert dataset
advert=pd.read_csv("advertising.csv")
advert

Unnamed: 0.1,Unnamed: 0,TV Ad Budget ($),Radio Ad Budget ($),Newspaper Ad Budget ($),Sales ($)
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [3]:
#display all rows
pd.set_option("display.max_rows",None)

In [4]:
advert.columns

Index(['Unnamed: 0', 'TV Ad Budget ($)', 'Radio Ad Budget ($)',
       'Newspaper Ad Budget ($)', 'Sales ($)'],
      dtype='object')

In [5]:
advert

Unnamed: 0.1,Unnamed: 0,TV Ad Budget ($),Radio Ad Budget ($),Newspaper Ad Budget ($),Sales ($)
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
5,6,8.7,48.9,75.0,7.2
6,7,57.5,32.8,23.5,11.8
7,8,120.2,19.6,11.6,13.2
8,9,8.6,2.1,1.0,4.8
9,10,199.8,2.6,21.2,10.6


In [6]:
# delete unwanted column
del advert["Unnamed: 0"]

In [7]:
#check for null columns
advert.isnull().sum()


TV Ad Budget ($)           0
Radio Ad Budget ($)        0
Newspaper Ad Budget ($)    0
Sales ($)                  0
dtype: int64

In [8]:
#check for correlation;meaning we want to know the influnce each of these budget mediums have on sales
advert.corr()["Sales ($)"]

TV Ad Budget ($)           0.782224
Radio Ad Budget ($)        0.576223
Newspaper Ad Budget ($)    0.228299
Sales ($)                  1.000000
Name: Sales ($), dtype: float64

In [9]:
#identify variables
x=advert.drop("Sales ($)",axis=1)
y=advert["Sales ($)"]

In [10]:
#import train test plit to train model

from sklearn.model_selection import train_test_split


In [11]:
x_train,x_test,y_train,y_test = train_test_split (x,y,random_state=0)

In [12]:
#import svr
from sklearn.svm import SVR


In [13]:
#train svr model model
model=SVR().fit(x_train,y_train)

In [14]:
predict=model.predict(x_test)

In [15]:
#import r2_score to check model accuracy
from sklearn.metrics import r2_score

In [16]:
#we have a 84 percent accuracy which is decent and reliable
r2_score(y_test,predict)*100

84.18832850073463

In [17]:
#import warnings to filter out warnings.
import warnings
warnings.filterwarnings("ignore")

#now lets predict what sales will be when we have 280 on tv ad bugdet, 20.1 on radio bugdet and 100.5 on  new paper 
model.predict([[280,20.1,100.5]])

array([18.35566168])

In [36]:
#to test our model again lets predict what sales will be when we have 300 on tv ad budget, 10.2 on raadio ad budget and 90.5 on newspaper.

model.predict([[300,10.2,90.5]])


array([17.47886414])