Skip to content

Commit d5b1def

Browse files
committed
Changes
1 parent d08b8d6 commit d5b1def

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1412
-977
lines changed

Chapter02/Activities/Activity_01.py

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,17 @@
1-
# Activity 1: Generating predictions and evaluating performance of multiple linear regression model
1+
# Activity 1: Line Plot
22

3-
# continuing from Exercise 4:
3+
# Create x
4+
x = ['January','February','March','April','May','June']
5+
print(x)
46

5-
# generate predictions on the test data
6-
predictions = model.predict(X_test)
7+
# Create y
8+
y = [1000, 1200, 1400, 1600, 1800, 2000]
9+
print(y)
710

8-
# plot correlation of predicted and actual values
9-
import matplotlib.pyplot as plt
10-
from scipy.stats import pearsonr
11-
plt.scatter(y_test, predictions)
12-
plt.xlabel('Y Test (True Values)')
13-
plt.ylabel('Predicted Values')
14-
plt.title('Predicted vs. Actual Values (r = {0:0.2f})'.format(pearsonr(y_test, predictions)[0], 2))
15-
plt.show()
16-
17-
# plot distribution of residuals
18-
import seaborn as sns
19-
from scipy.stats import shapiro
20-
sns.distplot((y_test - predictions), bins = 50)
21-
plt.xlabel('Residuals')
22-
plt.ylabel('Density')
23-
plt.title('Histogram of Residuals (Shapiro W p-value = {0:0.3f})'.format(shapiro(y_test - predictions)[1]))
24-
plt.show()
25-
26-
# compute metrics and put into a dataframe
27-
from sklearn import metrics
28-
import numpy as np
29-
metrics_df = pd.DataFrame({'Metric': ['MAE',
30-
'MSE',
31-
'RMSE',
32-
'R-Squared'],
33-
'Value': [metrics.mean_absolute_error(y_test, predictions),
34-
metrics.mean_squared_error(y_test, predictions),
35-
np.sqrt(metrics.mean_squared_error(y_test, predictions)),
36-
metrics.explained_variance_score(y_test, predictions)]}).round(3)
37-
print(metrics_df)
11+
# Create the plot
12+
import matplotlib.pyplot as plt # import matplotlib
13+
plt.plot(x, y, '*:b') # plot items sold (y) by month (x)
14+
plt.xlabel('Month') # label x-axis
15+
plt.ylabel('Items Sold') # label y-axis
16+
plt.title('Items Sold has been Increasing Linearly') # add plot title
17+
plt.show() # print plot

Chapter02/Activities/Activity_02.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,43 @@
1-
# Activity 2: Evaluating tuned model performance
1+
# Activity 2: Bar plot
22

3-
# continuing from Exercise 7:
3+
# Create a list for x
4+
x = ['Boston Celtics','Los Angeles Lakers', 'Chicago Bulls', 'Golden State Warriors', 'San Antonio Spurs']
5+
print(x)
46

5-
# generate predicted probabilities of yes
6-
predicted_prob = model.predict_proba(X_test)[:,1]
7+
# Create a list for y
8+
y = [17, 16, 6, 6, 5]
9+
print(y)
710

8-
# generate predicted classes
9-
predicted_class = model.predict(X_test)
11+
# Put into a data frame so we can sort them
12+
import pandas as pd
13+
df = pd.DataFrame({'Team': x,
14+
'Titles': y})
1015

11-
# evaluate performance with confusion matrix
12-
from sklearn.metrics import confusion_matrix
13-
import numpy as np
14-
cm = pd.DataFrame(confusion_matrix(y_test, predicted_class))
15-
cm['Total'] = np.sum(cm, axis=1)
16-
cm = cm.append(np.sum(cm, axis=0), ignore_index=True)
17-
cm.columns = ['Predicted No', 'Predicted Yes', 'Total']
18-
cm = cm.set_index([['Actual No', 'Actual Yes', 'Total']])
19-
print(cm)
16+
# Sort df by titles
17+
df_sorted = df.sort_values(by=('Titles'), ascending=False)
2018

21-
# generate a classification report
22-
from sklearn.metrics import classification_report
23-
print(classification_report(y_test, predicted_class))
19+
# Make a programmatic title
20+
team_with_most_titles = df_sorted['Team'][0] # get team with most titles
21+
most_titles = df_sorted['Titles'][0] # get the number of max titles
22+
title = 'The {} have the most titles with {}'.format(team_with_most_titles, most_titles) # create title
23+
print(title)
24+
25+
# Plot it
26+
import matplotlib.pyplot as plt # import matplotlib
27+
plt.bar(df_sorted['Team'], df_sorted['Titles'], color='red') # plot titles by team and make bars red
28+
plt.xlabel('Team') # create x label
29+
plt.ylabel('Number of Championships') # create y label
30+
plt.xticks(rotation=45) # rotate x tick labels 45 degrees
31+
plt.title(title) # title
32+
plt.savefig('Titles_by_Team') # save figure to present working directory
33+
plt.show() # print plot
34+
35+
# Fix the cropping
36+
import matplotlib.pyplot as plt
37+
plt.bar(df_sorted['Team'], df_sorted['Titles'], color='red')
38+
plt.xlabel('Team')
39+
plt.ylabel('Number of Championships')
40+
plt.xticks(rotation=45)
41+
plt.title(title)
42+
plt.savefig('Titles_by_Team', bbox_inches='tight') # fix the cropping issue
43+
plt.show()

Chapter02/Activities/Activity_03.py

Lines changed: 167 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,171 @@
1-
# Activity 3: Generating predictions and evaluating performance of grid search SVC model
1+
# Activity 3: Multiple Plot Types using Subplots
22

3-
# continuing from Exercise 9:
3+
# import Items_Sold_by_Week.csv
4+
import pandas as pd
5+
Items_by_Week = pd.read_csv('Items_Sold_by_Week.csv')
46

5-
# generate predicted classes
6-
predicted_class = model.predict(X_test_scaled)
7+
# For scatterplot
8+
# import Height_by_Weight.csv
9+
import pandas as pd
10+
Weight_by_Height = pd.read_csv('Weight_by_Height.csv')
711

8-
# evaluate performance with confusion matrix
9-
from sklearn.metrics import confusion_matrix
12+
# For histogram and Box-and-Whisker
13+
# Create an array of 100 normally distributed numbers
1014
import numpy as np
11-
cm = pd.DataFrame(confusion_matrix(y_test, predicted_class))
12-
cm['Total'] = np.sum(cm, axis=1)
13-
cm = cm.append(np.sum(cm, axis=0), ignore_index=True)
14-
cm.columns = ['Predicted No', 'Predicted Yes', 'Total']
15-
cm = cm.set_index([['Actual No', 'Actual Yes', 'Total']])
16-
print(cm)
17-
18-
# generate a classification report
19-
from sklearn.metrics import classification_report
20-
print(classification_report(y_test, predicted_class))
15+
y = np.random.normal(loc=0, scale=0.1, size=100) # 100 numbers with mean of 0 and standard deviation of 0.1
16+
17+
# generate figure with 6 subplots organized in 3 rows and 2 columns that do not overlap
18+
import matplotlib.pyplot as plt
19+
fig, axes = plt.subplots(nrows=3, ncols=2)
20+
plt.tight_layout() # prevent plot overlap
21+
22+
# Name the titles
23+
import matplotlib.pyplot as plt
24+
fig, axes = plt.subplots(nrows=3, ncols=2)
25+
# line plot (top left)
26+
axes[0,0].set_title('Line')
27+
# Bar plot (top right)
28+
axes[0,1].set_title('Bar')
29+
# Horizontal bar plot (middle left)
30+
axes[1,0].set_title('Horizontal Bar')
31+
# Histogram (middle right)
32+
axes[1,1].set_title('Histogram')
33+
# Scatterplot (bottom left)
34+
axes[2,0].set_title('Scatter')
35+
# Box-and-Whisker
36+
axes[2,1].set_title('Box-and-Whisker')
37+
plt.tight_layout() # prevent plot overlap
38+
39+
# in the ‘Line’, ‘Bar’, and ‘Horizontal Bar’ axes, plot ‘Items_Sold’ by ‘Week’ from the ‘Items_by_Week’
40+
# Horizontal bar
41+
import matplotlib.pyplot as plt
42+
fig, axes = plt.subplots(nrows=3, ncols=2)
43+
# line plot (top left)
44+
axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
45+
axes[0,0].set_title('Line')
46+
# Bar plot (top right)
47+
axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
48+
axes[0,1].set_title('Bar')
49+
# Horizontal bar plot (middle left)
50+
axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
51+
axes[1,0].set_title('Horizontal Bar')
52+
# Histogram (middle right)
53+
axes[1,1].set_title('Histogram')
54+
# Scatterplot (bottom left)
55+
axes[2,0].set_title('Scatter')
56+
# Box-and-Whisker
57+
axes[2,1].set_title('Box-and-Whisker')
58+
plt.tight_layout() # prevent plot overlap
59+
60+
# in the 'Histogram' and 'Box-and-Whisker axes, plot ‘Items_Sold’ by ‘Week’ from the ‘Items_by_Week’
61+
# Horizontal bar
62+
import matplotlib.pyplot as plt
63+
fig, axes = plt.subplots(nrows=3, ncols=2)
64+
# line plot (top left)
65+
axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
66+
axes[0,0].set_title('Line')
67+
# Bar plot (top right)
68+
axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
69+
axes[0,1].set_title('Bar')
70+
# Horizontal bar plot (middle left)
71+
axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
72+
axes[1,0].set_title('Horizontal Bar')
73+
# Histogram (middle right)
74+
axes[1,1].hist(y, bins=20)
75+
axes[1,1].set_title('Histogram')
76+
# Scatterplot (bottom left)
77+
axes[2,1].boxplot(y)
78+
axes[2,0].set_title('Scatter')
79+
# Box-and-Whisker
80+
axes[2,1].set_title('Box-and-Whisker')
81+
plt.tight_layout() # prevent plot overlap
82+
83+
# add scatterplot
84+
import matplotlib.pyplot as plt
85+
fig, axes = plt.subplots(nrows=3, ncols=2)
86+
# line plot (top left)
87+
axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
88+
axes[0,0].set_title('Line')
89+
# Bar plot (top right)
90+
axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
91+
axes[0,1].set_title('Bar')
92+
# Horizontal bar plot (middle left)
93+
axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
94+
axes[1,0].set_title('Horizontal Bar')
95+
# Histogram (middle right)
96+
axes[1,1].hist(y, bins=20) # for histogram
97+
axes[1,1].set_title('Histogram')
98+
# Scatterplot (bottom left)
99+
axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
100+
axes[2,0].set_title('Scatter')
101+
# Box-and-Whisker
102+
axes[2,1].boxplot(y) # for Box-and-Whisker
103+
axes[2,1].set_title('Box-and-Whisker')
104+
plt.tight_layout() # prevent plot overlap
105+
106+
# Set x- and y-axis for each subplot
107+
import matplotlib.pyplot as plt
108+
fig, axes = plt.subplots(nrows=3, ncols=2)
109+
# line plot (top left)
110+
axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
111+
axes[0,0].set_xlabel('Week')
112+
axes[0,0].set_ylabel('Items Sold')
113+
axes[0,0].set_title('Line')
114+
# Bar plot (top right)
115+
axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
116+
axes[0,1].set_xlabel('Week')
117+
axes[0,1].set_ylabel('Items Sold')
118+
axes[0,1].set_title('Bar')
119+
# Horizontal bar plot (middle left)
120+
axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
121+
axes[1,0].set_xlabel('Items Sold')
122+
axes[1,0].set_ylabel('Week')
123+
axes[1,0].set_title('Horizontal Bar')
124+
# Histogram (middle right)
125+
axes[1,1].hist(y, bins=20) # for histogram
126+
axes[1,1].set_xlabel('y')
127+
axes[1,1].set_ylabel('Frequency')
128+
axes[1,1].set_title('Histogram')
129+
# Scatterplot (bottom left)
130+
axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
131+
axes[2,0].set_xlabel('Height')
132+
axes[2,0].set_ylabel('Weight')
133+
axes[2,0].set_title('Scatter')
134+
# Box-and-Whisker
135+
axes[2,1].boxplot(y) # for Box-and-Whisker
136+
axes[2,1].set_title('Box-and-Whisker')
137+
plt.tight_layout() # prevent plot overlap
138+
139+
# Enlarge the figure size and Save the figure
140+
import matplotlib.pyplot as plt
141+
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(8,8)) # for figure size
142+
# line plot (top left)
143+
axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
144+
axes[0,0].set_xlabel('Week')
145+
axes[0,0].set_ylabel('Items Sold')
146+
axes[0,0].set_title('Line')
147+
# Bar plot (top right)
148+
axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
149+
axes[0,1].set_xlabel('Week')
150+
axes[0,1].set_ylabel('Items Sold')
151+
axes[0,1].set_title('Bar')
152+
# Horizontal bar plot (middle left)
153+
axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
154+
axes[1,0].set_xlabel('Items Sold')
155+
axes[1,0].set_ylabel('Week')
156+
axes[1,0].set_title('Horizontal Bar')
157+
# Histogram (middle right)
158+
axes[1,1].hist(y, bins=20) # for histogram
159+
axes[1,1].set_xlabel('y')
160+
axes[1,1].set_ylabel('Frequency')
161+
axes[1,1].set_title('Histogram')
162+
# Scatterplot (bottom left)
163+
axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
164+
axes[2,0].set_xlabel('Height')
165+
axes[2,0].set_ylabel('Weight')
166+
axes[2,0].set_title('Scatter')
167+
# Box-and-Whisker
168+
axes[2,1].boxplot(y) # for Box-and-Whisker
169+
axes[2,1].set_title('Box-and-Whisker')
170+
plt.tight_layout() # prevent plot overlap
171+
fig.savefig('Six_Subplots') # save figure

0 commit comments

Comments
 (0)