In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from pathlib import Path
from statistics import mean
from matplotlib import style
import matplotlib.pyplot as plt
#from sklearn import linear_model
#from sklearn.datasets import make_regression
#from sklearn.preprocessing import StandardScaler
#from sklearn.linear_model import LinearRegression
#from sklearn.model_selection import train_test_split
# Imported from a previous work. Maybe use SciKit but not likely.

In [2]:
coffee_data = "Coffee_sales.csv"
coffee_df = pd.read_csv(coffee_data)
coffee_df.dtypes

hour_of_day      int64
cash_type       object
money          float64
coffee_name     object
Time_of_Day     object
Weekday         object
Month_name      object
Weekdaysort      int64
Monthsort        int64
Date            object
Time            object
dtype: object

In [3]:
coffee_df = coffee_df.drop(['Time_of_Day','hour_of_day'],axis=1)
coffee_df.head()

Unnamed: 0,cash_type,money,coffee_name,Weekday,Month_name,Weekdaysort,Monthsort,Date,Time
0,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,10:15:50.520000
1,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:19:22.539000
2,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:20:18.089000
3,card,28.9,Americano,Fri,Mar,5,3,2024-03-01,13:46:33.006000
4,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,13:48:14.626000


In [4]:
# Finding null values
for column in coffee_df.columns:
    print(f"Column {column} has {coffee_df[column].isnull().sum()} null values")

Column cash_type has 0 null values
Column money has 0 null values
Column coffee_name has 0 null values
Column Weekday has 0 null values
Column Month_name has 0 null values
Column Weekdaysort has 0 null values
Column Monthsort has 0 null values
Column Date has 0 null values
Column Time has 0 null values


In [5]:
# Renaming to more conventional nomenclature. (coffee_name??? some aren't even coffee)
coffee_df.rename(columns = {"cash_type": "Payment_Method",
                            "money": "Payment_Amount",
                           "coffee_name":"Product_Name",
                           "Month_name":"Month",
                           "Weekdaysort":"WeekNum",
                           "Monthsort":"MonthNum"},inplace = True)

In [6]:
coffee_df.head()

Unnamed: 0,Payment_Method,Payment_Amount,Product_Name,Weekday,Month,WeekNum,MonthNum,Date,Time
0,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,10:15:50.520000
1,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:19:22.539000
2,card,38.7,Hot Chocolate,Fri,Mar,5,3,2024-03-01,12:20:18.089000
3,card,28.9,Americano,Fri,Mar,5,3,2024-03-01,13:46:33.006000
4,card,38.7,Latte,Fri,Mar,5,3,2024-03-01,13:48:14.626000


In [7]:
Product_Name_Unique = coffee_df['Product_Name'].unique()
Product_Name_Unique

array(['Latte', 'Hot Chocolate', 'Americano', 'Americano with Milk',
       'Cocoa', 'Cortado', 'Espresso', 'Cappuccino'], dtype=object)

In [8]:
# Filtering down to coffee or coffee adjacent items
coffee_sales_df = coffee_df.filter(['Product_Name','Payment_Amount'])
coffee_sales_df = coffee_sales_df.loc[(coffee_sales_df['Product_Name'] == 'Latte') |
                                        (coffee_sales_df['Product_Name'] == 'Americano') |
                                        (coffee_sales_df['Product_Name'] == 'Americano with Milk') |
                                        (coffee_sales_df['Product_Name'] == 'Cortado') |
                                        (coffee_sales_df['Product_Name'] == 'Espresso') |
                                        (coffee_sales_df['Product_Name'] == 'Cappuccino')].reset_index(drop=True)
coffee_sales_df

Unnamed: 0,Product_Name,Payment_Amount
0,Latte,38.70
1,Americano,28.90
2,Latte,38.70
3,Americano with Milk,33.80
4,Americano with Milk,33.80
...,...,...
3027,Americano with Milk,30.86
3028,Latte,35.76
3029,Cappuccino,35.76
3030,Americano,25.96


In [9]:
# Chocolate Items
coffee_sales_df = coffee_df.filter(['Product_Name','Payment_Amount'])
chocolate_sales_df = coffee_sales_df.loc[(coffee_sales_df['Product_Name'] == 'Hot Chocolate') | 
                                        (coffee_sales_df['Product_Name'] == 'Cocoa')].reset_index(drop=True)
chocolate_sales_df

Unnamed: 0,Product_Name,Payment_Amount
0,Hot Chocolate,38.70
1,Hot Chocolate,38.70
2,Hot Chocolate,38.70
3,Cocoa,38.70
4,Hot Chocolate,38.70
...,...,...
510,Hot Chocolate,35.76
511,Hot Chocolate,35.76
512,Cocoa,35.76
513,Cocoa,35.76


In [12]:
# Need to Set DataFrame to Action columns to lists for best fits
coffee_sales_list = coffee_sales_df['Payment_Amount'].tolist()

#Action_Tomatoes_list = Score_Genre_df_Action['Rotten_Tomatoes_Score'].tolist()
#Action_Metacritic_list = Score_Genre_df_Action['Metacritic_Score'].tolist()
#Action_Hidden_Gem_list = Score_Genre_df_Action['Hidden_Gem_Score'].tolist()

# Set said lists to arrays
C = np.array(coffee_sales_list, dtype=np.float64)
#T = np.array(Action_Tomatoes_list, dtype=np.float64)


In [13]:
# Graphing section
# Create linear regression object
Coffee_Regression = linear_model.LinearRegression()

# Train the model using the training sets
IMDb_Tomatoes_Action.fit(I.reshape(-1,1),T)

# Get the regression line using the trained model
regression_line = IMDb_Tomatoes_Action.predict(I.reshape(-1,1))

# Code for making predictions
#IMDb_score = 30
#Rotten_Tomatoes_Score = IMDb_Tomatoes_Action.predict(np.array([[IMDb_score]]))[0]

# Commented out code above and below is for making predictions
style.use('seaborn')
plt.scatter(I,T,label='Data Points', alpha=0.6,color='purple',s=75)
#plt.scatter(IMDb_score,Rotten_Tomatoes_Score , label='Rotten Tomatoes Prediction',color='blue',s=100)
plt.plot(I,regression_line,label='Best Fit Line', color='orange',linewidth=4)
plt.title('IMDb vs Rotten Tomatoes for Action Movies')
plt.xlabel('IMDb Score')
plt.ylabel('Rotten Tomatoes Score')
plt.legend()
plt.show()

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)