In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import ttest_ind

In [3]:
sales=pd.read_csv('sales_add.csv')
sales.head()

Unnamed: 0,Month,Region,Manager,Sales_before_digital_add(in $),Sales_After_digital_add(in $)
0,Month-1,Region - A,Manager - A,132921,270390
1,Month-2,Region - A,Manager - C,149559,223334
2,Month-3,Region - B,Manager - A,146278,244243
3,Month-4,Region - B,Manager - B,152167,231808
4,Month-5,Region - C,Manager - B,159525,258402


In [4]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 5 columns):
 #   Column                          Non-Null Count  Dtype 
---  ------                          --------------  ----- 
 0   Month                           22 non-null     object
 1   Region                          22 non-null     object
 2   Manager                         22 non-null     object
 3   Sales_before_digital_add(in $)  22 non-null     int64 
 4   Sales_After_digital_add(in $)   22 non-null     int64 
dtypes: int64(2), object(3)
memory usage: 1008.0+ bytes


# Effect of digital marketing in sales

In [5]:
sales.describe()

Unnamed: 0,Sales_before_digital_add(in $),Sales_After_digital_add(in $)
count,22.0,22.0
mean,149239.954545,231123.727273
std,14844.042921,25556.777061
min,130263.0,187305.0
25%,138087.75,214960.75
50%,147444.0,229986.5
75%,157627.5,250909.0
max,178939.0,276279.0


In [6]:
tval,pval=ttest_ind(sales['Sales_After_digital_add(in $)'],sales['Sales_before_digital_add(in $)'])
print('p value:',pval)
if pval <0.05:
  print(" Null hypothesis is rejected \n","Digital marketing improves sales")
else:
  print(" Null hypothesis is accepted \n","Digital marketing has no effect")

p value: 2.614368006904645e-16
 Null hypothesis is rejected 
 Digital marketing improves sales


# dependency between the features “Region” and “Manager”.

In [7]:
contingency_table=pd.crosstab(sales["Region"],sales["Manager"])
print('contingency_table :-\n',contingency_table)

contingency_table :-
 Manager     Manager - A  Manager - B  Manager - C
Region                                           
Region - A            4            3            3
Region - B            4            1            2
Region - C            1            3            1


In [8]:
Observed_Values = contingency_table.values 
print("Observed Values :-\n",Observed_Values)

Observed Values :-
 [[4 3 3]
 [4 1 2]
 [1 3 1]]


In [9]:
b=stats.chi2_contingency(contingency_table)
Expected_Values = b[3]
print("Expected Values :-\n",Expected_Values)

Expected Values :-
 [[4.09090909 3.18181818 2.72727273]
 [2.86363636 2.22727273 1.90909091]
 [2.04545455 1.59090909 1.36363636]]


In [10]:
from scipy.stats import chi2
chi_square=sum([(o-e)**2./e for o,e in zip(Observed_Values,Expected_Values)])
chi_square_statistic=chi_square[0]+chi_square[1]
print("chi-square statistic:-",chi_square_statistic)
no_of_rows=len(contingency_table.iloc[0:2,0])
no_of_columns=len(contingency_table.iloc[0,0:2])
ddof=(no_of_rows-1)*(no_of_columns-1)
print("Degree of Freedom:-",ddof)
alpha = 0.05

chi-square statistic:- 2.921995464852608
Degree of Freedom:- 1


In [11]:
critical_value=chi2.ppf(q=1-alpha,df=ddof)
print('critical_value:',critical_value)

critical_value: 3.841458820694124


In [12]:
p_value=1-chi2.cdf(x=chi_square_statistic,df=ddof)
print('p-value:',p_value)
print('Significance level: ',alpha)
print('Degree of Freedom: ',ddof)
print('chi-square statistic:',chi_square_statistic)
print('critical_value:',critical_value)
print('p-value:',p_value)
if chi_square_statistic>=critical_value:
    print("Reject null hypothesis,There is a relationship between 2 categorical variables")
else:
    print("Accept null hypothesis,There is no relationship between 2 categorical variables")
    
if p_value<=alpha:
    print("Reject null hypothesis,There is a relationship between 2 categorical variables")
else:
    print("Accept null hypothesis,There is no relationship between 2 categorical variables")

p-value: 0.08737974495299139
Significance level:  0.05
Degree of Freedom:  1
chi-square statistic: 2.921995464852608
critical_value: 3.841458820694124
p-value: 0.08737974495299139
Accept null hypothesis,There is no relationship between 2 categorical variables
Accept null hypothesis,There is no relationship between 2 categorical variables
