TrainingByPackt
diff --git a/‎Chapter02/Activities/Activity_01.py
Lines changed: 14 additions & 34 deletions b/‎Chapter02/Activities/Activity_01.py
Lines changed: 14 additions & 34 deletions
diff --git a/‎Chapter02/Activities/Activity_02.py
Lines changed: 38 additions & 18 deletions b/‎Chapter02/Activities/Activity_02.py
Lines changed: 38 additions & 18 deletions
diff --git a/‎Chapter02/Activities/Activity_03.py
Lines changed: 167 additions & 16 deletions b/‎Chapter02/Activities/Activity_03.py
Lines changed: 167 additions & 16 deletions
@@ -1,37 +1,17 @@
-# Activity 1: Generating predictions and evaluating performance of multiple linear regression model
+# Activity 1: Line Plot
 
-# continuing from Exercise 4:
+# Create x
+x = ['January','February','March','April','May','June']
+print(x)
 
-# generate predictions on the test data
-predictions = model.predict(X_test)
+# Create y
+y = [1000, 1200, 1400, 1600, 1800, 2000]
+print(y)
 
-# plot correlation of predicted and actual values
-import matplotlib.pyplot as plt
-from scipy.stats import pearsonr
-plt.scatter(y_test, predictions)
-plt.xlabel('Y Test (True Values)')
-plt.ylabel('Predicted Values')
-plt.title('Predicted vs. Actual Values (r = {0:0.2f})'.format(pearsonr(y_test, predictions)[0], 2))
-plt.show()
-
-# plot distribution of residuals
-import seaborn as sns
-from scipy.stats import shapiro
-sns.distplot((y_test - predictions), bins = 50)
-plt.xlabel('Residuals')
-plt.ylabel('Density')
-plt.title('Histogram of Residuals (Shapiro W p-value = {0:0.3f})'.format(shapiro(y_test - predictions)[1]))
-plt.show()
-
-# compute metrics and put into a dataframe
-from sklearn import metrics
-import numpy as np
-metrics_df = pd.DataFrame({'Metric': ['MAE', 
-                                      'MSE', 
-                                      'RMSE', 
-                                      'R-Squared'],
-                          'Value': [metrics.mean_absolute_error(y_test, predictions),
-                                    metrics.mean_squared_error(y_test, predictions),
-                                    np.sqrt(metrics.mean_squared_error(y_test, predictions)),
-                                    metrics.explained_variance_score(y_test, predictions)]}).round(3)
-print(metrics_df)
+# Create the plot
+import matplotlib.pyplot as plt # import matplotlib
+plt.plot(x, y, '*:b') # plot items sold (y) by month (x)
+plt.xlabel('Month') # label x-axis
+plt.ylabel('Items Sold') # label y-axis
+plt.title('Items Sold has been Increasing Linearly') # add plot title
+plt.show() # print plot
@@ -1,23 +1,43 @@
-# Activity 2: Evaluating tuned model performance
+# Activity 2: Bar plot
 
-# continuing from Exercise 7:
+# Create a list for x
+x = ['Boston Celtics','Los Angeles Lakers', 'Chicago Bulls', 'Golden State Warriors', 'San Antonio Spurs']
+print(x)
 
-# generate predicted probabilities of yes
-predicted_prob = model.predict_proba(X_test)[:,1]
+# Create a list for y
+y = [17, 16, 6, 6, 5]
+print(y)
 
-# generate predicted classes
-predicted_class = model.predict(X_test)
+# Put into a data frame so we can sort them
+import pandas as pd
+df = pd.DataFrame({'Team': x,
+                   'Titles': y})
 
-# evaluate performance with confusion matrix
-from sklearn.metrics import confusion_matrix
-import numpy as np
-cm = pd.DataFrame(confusion_matrix(y_test, predicted_class))
-cm['Total'] = np.sum(cm, axis=1)
-cm = cm.append(np.sum(cm, axis=0), ignore_index=True)
-cm.columns = ['Predicted No', 'Predicted Yes', 'Total']
-cm = cm.set_index([['Actual No', 'Actual Yes', 'Total']])
-print(cm)
+# Sort df by titles
+df_sorted = df.sort_values(by=('Titles'), ascending=False)
 
-# generate a classification report
-from sklearn.metrics import classification_report
-print(classification_report(y_test, predicted_class))
+# Make a programmatic title
+team_with_most_titles = df_sorted['Team'][0] # get team with most titles
+most_titles = df_sorted['Titles'][0] # get the number of max titles
+title = 'The {} have the most titles with {}'.format(team_with_most_titles, most_titles) # create title
+print(title)
+
+# Plot it
+import matplotlib.pyplot as plt # import matplotlib
+plt.bar(df_sorted['Team'], df_sorted['Titles'], color='red') # plot titles by team and make bars red
+plt.xlabel('Team') # create x label
+plt.ylabel('Number of Championships') # create y label
+plt.xticks(rotation=45) # rotate x tick labels 45 degrees
+plt.title(title) # title
+plt.savefig('Titles_by_Team') # save figure to present working directory
+plt.show() # print plot
+
+# Fix the cropping
+import matplotlib.pyplot as plt
+plt.bar(df_sorted['Team'], df_sorted['Titles'], color='red')
+plt.xlabel('Team')
+plt.ylabel('Number of Championships')
+plt.xticks(rotation=45)
+plt.title(title)
+plt.savefig('Titles_by_Team', bbox_inches='tight') # fix the cropping issue
+plt.show()
@@ -1,20 +1,171 @@
-# Activity 3: Generating predictions and evaluating performance of grid search SVC model
+# Activity 3: Multiple Plot Types using Subplots 
 
-# continuing from Exercise 9:
+# import Items_Sold_by_Week.csv
+import pandas as pd
+Items_by_Week = pd.read_csv('Items_Sold_by_Week.csv')
 
-# generate predicted classes
-predicted_class = model.predict(X_test_scaled)
+# For scatterplot
+# import Height_by_Weight.csv
+import pandas as pd
+Weight_by_Height = pd.read_csv('Weight_by_Height.csv')
 
-# evaluate performance with confusion matrix
-from sklearn.metrics import confusion_matrix
+# For histogram and Box-and-Whisker
+# Create an array of 100 normally distributed numbers
 import numpy as np
-cm = pd.DataFrame(confusion_matrix(y_test, predicted_class))
-cm['Total'] = np.sum(cm, axis=1)
-cm = cm.append(np.sum(cm, axis=0), ignore_index=True)
-cm.columns = ['Predicted No', 'Predicted Yes', 'Total']
-cm = cm.set_index([['Actual No', 'Actual Yes', 'Total']])
-print(cm)
-
-# generate a classification report
-from sklearn.metrics import classification_report
-print(classification_report(y_test, predicted_class))
+y = np.random.normal(loc=0, scale=0.1, size=100) # 100 numbers with mean of 0 and standard deviation of 0.1
+
+# generate figure with 6 subplots organized in 3 rows and 2 columns that do not overlap
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+plt.tight_layout() # prevent plot overlap
+
+# Name the titles
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+# line plot (top left)
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].set_title('Histogram')
+# Scatterplot (bottom left)
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+
+# in the ‘Line’, ‘Bar’, and ‘Horizontal Bar’ axes, plot ‘Items_Sold’ by ‘Week’ from the ‘Items_by_Week’ 
+# Horizontal bar
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+# line plot (top left)
+axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].set_title('Histogram')
+# Scatterplot (bottom left)
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+
+# in the 'Histogram' and 'Box-and-Whisker axes, plot ‘Items_Sold’ by ‘Week’ from the ‘Items_by_Week’ 
+# Horizontal bar
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+# line plot (top left)
+axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].hist(y, bins=20)
+axes[1,1].set_title('Histogram')
+# Scatterplot (bottom left)
+axes[2,1].boxplot(y)
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+
+# add scatterplot
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+# line plot (top left)
+axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].hist(y, bins=20) # for histogram
+axes[1,1].set_title('Histogram') 
+# Scatterplot (bottom left)
+axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].boxplot(y) # for Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+
+# Set x- and y-axis for each subplot
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2)
+# line plot (top left)
+axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
+axes[0,0].set_xlabel('Week')
+axes[0,0].set_ylabel('Items Sold')
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
+axes[0,1].set_xlabel('Week')
+axes[0,1].set_ylabel('Items Sold')
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
+axes[1,0].set_xlabel('Items Sold')
+axes[1,0].set_ylabel('Week')
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].hist(y, bins=20) # for histogram
+axes[1,1].set_xlabel('y')
+axes[1,1].set_ylabel('Frequency')
+axes[1,1].set_title('Histogram') 
+# Scatterplot (bottom left)
+axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
+axes[2,0].set_xlabel('Height')
+axes[2,0].set_ylabel('Weight')
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].boxplot(y) # for Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+
+# Enlarge the figure size and Save the figure
+import matplotlib.pyplot as plt
+fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(8,8)) # for figure size
+# line plot (top left)
+axes[0,0].plot(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for line plot
+axes[0,0].set_xlabel('Week')
+axes[0,0].set_ylabel('Items Sold')
+axes[0,0].set_title('Line')
+# Bar plot (top right)
+axes[0,1].bar(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for bar plot
+axes[0,1].set_xlabel('Week')
+axes[0,1].set_ylabel('Items Sold')
+axes[0,1].set_title('Bar')
+# Horizontal bar plot (middle left)
+axes[1,0].barh(Items_by_Week['Week'], Items_by_Week['Items_Sold']) # for horizontal bar plot
+axes[1,0].set_xlabel('Items Sold')
+axes[1,0].set_ylabel('Week')
+axes[1,0].set_title('Horizontal Bar')
+# Histogram (middle right)
+axes[1,1].hist(y, bins=20) # for histogram
+axes[1,1].set_xlabel('y')
+axes[1,1].set_ylabel('Frequency')
+axes[1,1].set_title('Histogram') 
+# Scatterplot (bottom left)
+axes[2,0].scatter(Weight_by_Height['Height'], Weight_by_Height['Weight']) # for scatterplot
+axes[2,0].set_xlabel('Height')
+axes[2,0].set_ylabel('Weight')
+axes[2,0].set_title('Scatter')
+# Box-and-Whisker
+axes[2,1].boxplot(y) # for Box-and-Whisker
+axes[2,1].set_title('Box-and-Whisker')
+plt.tight_layout() # prevent plot overlap
+fig.savefig('Six_Subplots') # save figure