In [1]:
import numpy as np
import pandas as pd
from scipy import stats 

### *EXERCISE 1*

The hourly wages in a particular industry are normally distributed with mean $13.20 and standard deviation $2.50. A company in this industry employs 40 workers, paying them an average of $12.20 per hour. Can this company be accused of paying substandard wages? Use an α = .01 level test.

In [2]:
H_0 = "This company can't  be accused of paying substandard wages?"
H_A = "This company can  be accused of paying substandard wages?"

In [3]:
x_bar = 12.20 # sample mean
mu = 13.20 # population mean
sigma = 2.50 # population std
n = 40 # sample
alpha = 0.01 # significance level

In [4]:
z = (x_bar - mu)/(sigma/np.sqrt(n))
z

-2.5298221281347035

In [5]:
p_value = 1 - stats.norm.cdf(z)
p_value

0.9942939818069991

In [6]:
if p_value<alpha:
    print("Reject The Null Hipothesis")
else:
    print("Fail To Reject The Null Hipothesis")

Fail To Reject The Null Hipothesis


### *EXERCISE 2*

Shear strength measurements derived from unconfined compression tests for two types of soils gave the results shown in the following document (measurements in tons per square foot). Do the soils appear to differ with respect to average shear strength, at the 1% significance level?

In [7]:
soil = pd.read_csv("soil.csv")

In [11]:
soil

Unnamed: 0,Soil1,Soil2
0,1.442,1.364
1,1.943,1.878
2,1.11,1.337
3,1.912,1.828
4,1.553,1.371
5,1.641,1.428
6,1.499,1.119
7,1.347,1.373
8,1.685,1.589
9,1.578,1.714


In [24]:
soil.dropna(inplace=True)

In [26]:
soil.head()

Unnamed: 0,Soil1,Soil2
0,1.442,1.364
1,1.943,1.878
2,1.11,1.337
3,1.912,1.828
4,1.553,1.371


In [48]:
diff = soil.Soil1 - soil.Soil2
diff.head()

0    0.078
1    0.065
2   -0.227
3    0.084
4    0.182
dtype: float64

In [49]:
d_mean = diff.mean()
d_mean

0.28416666666666673

In [50]:
d_std = diff.std()
d_std

0.29312937985356113

In [51]:
n = len(soil)
n

30

In [52]:
t_0 = d_mean/(d_std/np.sqrt(n))
t_0

5.309754126390885

In [54]:
p_value_2 = 2*(1-stats.t.cdf(t_0,n-1))
p_value_2

1.0725240883413534e-05

In [55]:
alpha_2 = 0.01
if p_value_2<alpha_2:
    print("Reject The Null Hipothesis")
else:
    print("Fail To Reject The Null Hipothesis")

Reject The Null Hipothesis


### *EXERCISE 3*

The following dataset is based on data provided by the World Bank (https://datacatalog.worldbank.org/dataset/education-statistics). World Bank Edstats.  2015 PISA Test Dataset

1. Get descriptive statistics (the central tendency, dispersion and shape of a dataset’s distribution) for each continent group (AS, EU, AF, NA, SA, OC).

2. Determine whether there is any difference (on the average) for the math scores among European (EU) and Asian (AS) countries (assume normality and equal variances). Draw side-by-side box plots.

In [20]:
pisa = pd.read_csv("2015 PISA Test.csv")

In [22]:
pisa.head()

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
0,ALB,EU,63.252933,413.157,405.2588,427.225
1,ARE,AS,90.5,427.4827,433.5423,436.7311
2,ARG,SA,68.043064,409.0333,425.3031,432.2262
3,AUS,OC,84.560519,493.8962,502.9006,509.9939
4,AUT,EU,83.940142,496.7423,484.8656,495.0375


In [56]:
pisa.groupby("Continent_Code").mean()

Unnamed: 0_level_0,internet_users_per_100,Math,Reading,Science
Continent_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AF,43.359918,363.2121,355.4574,381.07425
AS,68.455613,466.216647,454.059682,467.945847
EU,77.274888,477.981449,476.164608,478.299381
OC,86.391704,494.55975,506.08565,511.6487
SA,60.180494,402.8877,425.359229,421.747186


In [59]:
AF = pisa[pisa["Continent_Code"]=="AF"]
AF

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
19,DZA,AF,38.2,359.6062,349.8593,375.7451
65,TUN,AF,48.519836,366.818,361.0555,386.4034


In [60]:
AS = pisa[pisa["Continent_Code"]=="AS"]
AS

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
1,ARE,AS,90.5,427.4827,433.5423,436.7311
11,CHN,AS,50.3,531.2961,493.9412,517.7793
25,GEO,AS,47.56976,403.8332,401.2881,411.1315
27,HKG,AS,84.948353,547.931,526.6753,523.2774
30,IDN,AS,21.976068,386.1096,397.2595,403.0997
33,ISR,AS,77.35209,469.6695,478.9606,466.5528
35,JOR,AS,60.114383,380.259,408.1022,408.6691
36,JPN,AS,91.058028,532.4399,515.9585,538.3948
37,KAZ,AS,72.9,459.816,427.141,456.4836
38,KOR,AS,89.648631,524.1062,517.4367,515.8099


In [61]:
EU = pisa[pisa["Continent_Code"]=="EU"]
EU

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
0,ALB,EU,63.252933,413.157,405.2588,427.225
4,AUT,EU,83.940142,496.7423,484.8656,495.0375
5,BEL,EU,85.0529,506.9844,498.5242,501.9997
6,BGR,EU,56.6563,441.1899,431.7175,445.772
9,CHE,EU,87.479056,521.2506,492.1982,505.5058
14,CYP,EU,71.7159,437.1443,442.8443,432.5964
15,CZE,EU,75.668839,492.3254,487.2501,492.83
16,DEU,EU,87.5898,505.9713,509.1041,509.1406
17,DNK,EU,96.3305,511.0876,499.8146,501.9369
20,ESP,EU,78.6896,485.8432,495.5764,492.7861


In [62]:
OC = pisa[pisa["Continent_Code"]=="OC"]
OC

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
3,AUS,OC,84.560519,493.8962,502.9006,509.9939
52,NZL,OC,88.222889,495.2233,509.2707,513.3035


In [63]:
SA = pisa[pisa["Continent_Code"]=="SA"]
SA

Unnamed: 0,Country Code,Continent_Code,internet_users_per_100,Math,Reading,Science
2,ARG,SA,68.043064,409.0333,425.3031,432.2262
7,BRA,SA,58.327952,377.0695,407.3486,400.6821
10,CHL,SA,64.289,422.6714,458.5709,446.9561
12,COL,SA,55.904973,389.6438,424.9052,415.7288
53,PER,SA,40.9,386.5606,397.5414,396.6836
64,TTO,SA,69.198471,417.2434,427.2733,424.5905
67,URY,SA,64.6,417.9919,436.5721,435.363
