## Intro to Dataframes

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], columns=["A", "B", "C"], index=['x','y','z','v'])

In [None]:
# Display the first 5 rows of the DataFrame without printing the entire dataset.
df.head()

Unnamed: 0,A,B,C
x,1,2,3
y,4,5,6
z,7,8,9
v,10,11,12


In [None]:
df.head(1)

Unnamed: 0,A,B,C
x,1,2,3


In [None]:
df.tail(2)

Unnamed: 0,A,B,C
z,7,8,9
v,10,11,12


In [None]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [None]:
df.index.tolist()

['x', 'y', 'z', 'v']

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, x to v
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       4 non-null      int64
 1   B       4 non-null      int64
 2   C       4 non-null      int64
dtypes: int64(3)
memory usage: 128.0+ bytes


In [None]:
df.describe()

Unnamed: 0,A,B,C
count,4.0,4.0,4.0
mean,5.5,6.5,7.5
std,3.872983,3.872983,3.872983
min,1.0,2.0,3.0
25%,3.25,4.25,5.25
50%,5.5,6.5,7.5
75%,7.75,8.75,9.75
max,10.0,11.0,12.0


In [None]:
df.nunique()

Unnamed: 0,0
A,4
B,4
C,4


In [None]:
df['A'].unique()

array([ 1,  4,  7, 10])

In [None]:
df.shape

(4, 3)

In [None]:
df.size

12

In [None]:
df

Unnamed: 0,A,B,C
x,1,2,3
y,4,5,6
z,7,8,9
v,10,11,12


## Loading in Dataframes from Files

In [None]:
# Read a CSV file into a Dataframe
coffee = pd.read_csv('/content/coffee.csv')

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


In [None]:
bios = pd.read_csv('/content/bios.csv')

In [None]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [None]:
# Read a Parquet file into a DataFrame
results = pd.read_parquet('/content/results.parquet')

In [None]:
results.head()

Unnamed: 0,year,type,discipline,event,as,athlete_id,noc,team,place,tied,medal
0,1912.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,17.0,True,
1,1912.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jean Montariol,,False,
2,1920.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,32.0,True,
3,1920.0,Summer,Tennis,"Doubles, Mixed (Olympic)",Jean-François Blanchy,1,FRA,Jeanne Vaussard,8.0,True,
4,1920.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jacques Brugnon,4.0,False,


In [None]:
# To read an excel file spreadsheet
olympics_data = pd.read_excel('/content/olympics-data.xlsx', sheet_name="results")

In [None]:
olympics_data.head()

Unnamed: 0,year,type,discipline,event,as,athlete_id,noc,team,place,tied,medal
0,1912.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,17.0,True,
1,1912.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jean Montariol,,False,
2,1920.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,32.0,True,
3,1920.0,Summer,Tennis,"Doubles, Mixed (Olympic)",Jean-François Blanchy,1,FRA,Jeanne Vaussard,8.0,True,
4,1920.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jacques Brugnon,4.0,False,


## Accessing Data with Pandas

In [None]:
# Shows everything in the CSV file
print(coffee)

          Day Coffee Type  Units Sold
0      Monday    Espresso          25
1      Monday       Latte          15
2     Tuesday    Espresso          30
3     Tuesday       Latte          20
4   Wednesday    Espresso          35
5   Wednesday       Latte          25
6    Thursday    Espresso          40
7    Thursday       Latte          30
8      Friday    Espresso          45
9      Friday       Latte          35
10   Saturday    Espresso          45
11   Saturday       Latte          35
12     Sunday    Espresso          45
13     Sunday       Latte          35


In [None]:
# Formats the output to be more visually appealing
display(coffee)

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


In [None]:
coffee.tail(10)

Unnamed: 0,Day,Coffee Type,Units Sold
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35
10,Saturday,Espresso,45
11,Saturday,Latte,35
12,Sunday,Espresso,45
13,Sunday,Latte,35


In [None]:
# Access random data
coffee.sample(5)

Unnamed: 0,Day,Coffee Type,Units Sold
3,Tuesday,Latte,20
9,Friday,Latte,35
10,Saturday,Espresso,45
4,Wednesday,Espresso,35
5,Wednesday,Latte,25


In [None]:
# loc
# coffee.loc[Rows, Columns]

# Reatrives only first row
coffee.loc[0]

Unnamed: 0,0
Day,Monday
Coffee Type,Espresso
Units Sold,25


In [None]:
coffee.loc[[0,1,5]]

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
5,Wednesday,Latte,25


In [None]:
# From row 5 to 9 with only two columns
coffee.loc[5:9, ["Day", "Units Sold"]]

Unnamed: 0,Day,Units Sold
5,Wednesday,25
6,Thursday,40
7,Thursday,30
8,Friday,45
9,Friday,35


#### iloc

In [None]:
# Only using index values
coffee.iloc[:, [0,2]] # All rows, first and third columns

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,15
2,Tuesday,30
3,Tuesday,20
4,Wednesday,35
5,Wednesday,25
6,Thursday,40
7,Thursday,30
8,Friday,45
9,Friday,35


#### Other Stuff

In [None]:
# here we make the index to be the column 'day'
coffee.index = coffee["Day"]

In [None]:
coffee.loc["Monday":"Wednesday"] # as index from monday to wenesday

Unnamed: 0_level_0,Day,Coffee Type,Units Sold
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monday,Monday,Espresso,25
Monday,Monday,Latte,15
Tuesday,Tuesday,Espresso,30
Tuesday,Tuesday,Latte,20
Wednesday,Wednesday,Espresso,35
Wednesday,Wednesday,Latte,25


#### Setting Values

In [None]:
coffee.loc[1:3, "Units Sold"] = 10 # Change units sold for rows from 1 to 3

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,10
2,Tuesday,Espresso,10
3,Tuesday,Latte,10
4,Wednesday,Espresso,35


#### Optimized way to get single values (.at & .iat)

In [None]:
coffee.at[0,"Units Sold"]

25

In [None]:
# Takes only index
coffee.iat[3,1]

'Latte'

#### Getting Columns

In [None]:
coffee["Day"]

Unnamed: 0,Day
0,Monday
1,Monday
2,Tuesday
3,Tuesday
4,Wednesday
5,Wednesday
6,Thursday
7,Thursday
8,Friday
9,Friday


In [None]:
coffee[["Day", "Units Sold"]]

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,10
2,Tuesday,10
3,Tuesday,10
4,Wednesday,35
5,Wednesday,25
6,Thursday,40
7,Thursday,30
8,Friday,45
9,Friday,35


#### Sort Values

In [None]:
coffee.sort_values(["Units Sold"], ascending=False) # sort based on units sold from bigger to smaller number

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
7,Thursday,Latte,30
0,Monday,Espresso,25


In [None]:
coffee.sort_values(["Units Sold", "Coffee Type"], ascending=[0,1]) # sort based on units sloid from bigger to lower number, and then Coffe type from A-z

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
7,Thursday,Latte,30
0,Monday,Espresso,25


## Filtering Data

In [None]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [None]:
# First way using 'loc'
bios.loc[bios["height_cm"] > 215]

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
5089,5108,Viktor Pankrashkin,1957-06-19,Moskva (Moscow),Moskva,RUS,Soviet Union,220.0,112.0,1993-07-24
5583,5606,Paulinho Villas Boas,1963-01-26,São Paulo,São Paulo,BRA,Brazil,217.0,106.0,
5673,5696,Gunther Behnke,1963-01-19,Leverkusen,Nordrhein-Westfalen,GER,Germany,221.0,114.0,
5716,5739,Uwe Blab,1962-03-26,München (Munich),Bayern,GER,Germany West Germany,218.0,110.0,
5781,5804,Tommy Burleson,1952-02-24,Crossnore,North Carolina,USA,United States,223.0,102.0,
5796,5819,Andy Campbell,1956-07-21,Melbourne,Victoria,AUS,Australia,218.0,93.0,
6223,6250,Lars Hansen,1954-09-27,København (Copenhagen),Hovedstaden,DEN,Canada,216.0,105.0,
6270,6298,Hu Zhangbao,1963-04-05,,,,People's Republic of China,216.0,135.0,
6409,6440,Sergey Kovalenko,1947-08-11,,,,Soviet Union,216.0,111.0,2004-11-18
6420,6451,Jānis Krūmiņš,1930-01-30,Cēsis,Cēsu novads,LAT,Soviet Union,218.0,141.0,1994-11-20


In [None]:
bios.loc[bios["height_cm"] > 215, ["name", "height_cm"]] # But get only name and height for them

Unnamed: 0,name,height_cm
5089,Viktor Pankrashkin,220.0
5583,Paulinho Villas Boas,217.0
5673,Gunther Behnke,221.0
5716,Uwe Blab,218.0
5781,Tommy Burleson,223.0
5796,Andy Campbell,218.0
6223,Lars Hansen,216.0
6270,Hu Zhangbao,216.0
6409,Sergey Kovalenko,216.0
6420,Jānis Krūmiņš,218.0


#### Short-hand syntax (without .loc)

In [None]:
# Second way
bios[bios['height_cm'] > 215][["name","height_cm"]]

Unnamed: 0,name,height_cm
5089,Viktor Pankrashkin,220.0
5583,Paulinho Villas Boas,217.0
5673,Gunther Behnke,221.0
5716,Uwe Blab,218.0
5781,Tommy Burleson,223.0
5796,Andy Campbell,218.0
6223,Lars Hansen,216.0
6270,Hu Zhangbao,216.0
6409,Sergey Kovalenko,216.0
6420,Jānis Krūmiņš,218.0


#### Multiple filter conditions

In [None]:
bios[(bios['height_cm'] > 215) & (bios['born_country']=='USA')] # AND condition

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
5781,5804,Tommy Burleson,1952-02-24,Crossnore,North Carolina,USA,United States,223.0,102.0,
6722,6755,Shaquille O'Neal,1972-03-06,Newark,New Jersey,USA,United States,216.0,137.0,
6937,6972,David Robinson,1965-08-06,Key West,Florida,USA,United States,216.0,107.0,
123850,126093,Tyson Chandler,1982-10-02,Hanford,California,USA,United States,216.0,107.0,


#### Filter by string conditions

In [None]:
bios[bios['name'].str.contains("keith", case=False)] # Case insensitive capital or small

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
1897,1907,Keith Hanlon,1966-09-01,,,,Ireland,,,
3505,3517,Keith Wallace,1961-03-29,Preston,England,GBR,Great Britain,165.0,51.0,1999-12-31
6228,6255,Keith Hartley,1940-10-15,Vancouver,British Columbia,CAN,Canada,200.0,85.0,
8898,8946,Keith Mwila,1966-01-01,,,,Zambia,,,1993-01-09
12053,12118,Keith Hervey,1898-11-03,Fulham,England,GBR,Great Britain,,,1973-02-22
...,...,...,...,...,...,...,...,...,...,...
109900,111105,Keith Cumberpatch,1927-08-25,Christchurch,Canterbury,NZL,New Zealand,,,2013-11-15
115973,117348,Keith Sanderson,1975-02-02,Plymouth,Massachusetts,USA,United States,183.0,95.0,
117676,119195,Duncan Keith,1983-07-16,Winnipeg,Manitoba,CAN,Canada,185.0,88.0,
122121,124176,Keith Ferguson,1979-09-07,Sale,Victoria,AUS,Australia,176.0,78.0,


In [None]:
# Regex syntax
bios[bios['name'].str.contains('keith|patrick', case=False)] # OR condition

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
6,7,Patrick Chila,1969-11-27,Ris-Orangis,Essonne,FRA,France,180.0,73.0,
119,120,Patrick Wheatley,1899-01-20,Vryheid,KwaZulu-Natal,RSA,Great Britain,,,1967-11-05
319,320,Patrick De Koning,1961-04-23,Dendermonde,Oost-Vlaanderen,BEL,Belgium,178.0,92.0,
1897,1907,Keith Hanlon,1966-09-01,,,,Ireland,,,
2115,2125,Patrick Jopp,1962-01-08,,,,Switzerland,176.0,67.0,
...,...,...,...,...,...,...,...,...,...,...
143975,147633,Patrick Chinyemba,2001-01-03,,,,Zambia,,,
144172,147850,Patrick Jakob,1996-10-17,Sankt Johann in Tirol,Tirol,AUT,Austria,,,
144547,148239,Patrick Galbraith,1986-03-11,Haderslev,Syddanmark,DEN,Denmark,,,
144565,148257,Patrick Russell,1993-01-04,Gentofte,Hovedstaden,DEN,Denmark,186.0,93.0,


In [None]:
# Other cool regex filters

# Find athletes born in cities that start with a vowel:
vowel_cities = bios[bios['born_city'].str.contains(r'^[AEIOUaeiou]', na=False)]

# Find athletes with names that contain exactly two vowels:
two_vowels = bios[bios['name'].str.contains(r'^[^AEIOUaeiou]*[AEIOUaeiou][^AEIOUaeiou]*[AEIOUaeiou][^AEIOUaeiou]*$', na=False)]

# Find athletes with names that have repeated consecutive letters (e.g., "Aaron", "Emmett"):
repeated_letters = bios[bios['name'].str.contains(r'(.)\1', na=False)]

# Find athletes with names ending in 'son' or 'sen':
son_sen_names = bios[bios['name'].str.contains(r'son$|sen$', case=False, na=False)]

# Find athletes born in a year starting with '19':
born_19xx = bios[bios['born_date'].str.contains(r'^19', na=False)]

# Find athletes with names that do not contain any vowels:
no_vowels = bios[bios['name'].str.contains(r'^[^AEIOUaeiou]*$', na=False)]

# Find athletes whose names contain a hyphen or an apostrophe:
hyphen_apostrophe = bios[bios['name'].str.contains(r"[-']", na=False)]

# Find athletes with names that start and end with the same letter:
start_end_same = bios[bios['name'].str.contains(r'^(.).*\1$', na=False, case=False)]

# Find athletes with a born_city that has exactly 7 characters:
city_seven_chars = bios[bios['born_city'].str.contains(r'^.{7}$', na=False)]

# Find athletes with names containing three or more vowels:
three_or_more_vowels = bios[bios['name'].str.contains(r'([AEIOUaeiou].*){3,}', na=False)]


  repeated_letters = bios[bios['name'].str.contains(r'(.)\1', na=False)]
  start_end_same = bios[bios['name'].str.contains(r'^(.).*\1$', na=False, case=False)]
  three_or_more_vowels = bios[bios['name'].str.contains(r'([AEIOUaeiou].*){3,}', na=False)]


In [None]:
# Don't use regex search (exact match)
bios[bios['name'].str.contains('keith|patrick', case=False, regex=False)] # Will not retrive anything

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date


In [None]:
# isin method & startswith
# isin - where column's value is in a given list of values (more than one)
bios[bios['born_country'].isin(["USA", "FRA", "GBR"]) & (bios['name'].str.startswith("Keith"))]

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
3505,3517,Keith Wallace,1961-03-29,Preston,England,GBR,Great Britain,165.0,51.0,1999-12-31
12053,12118,Keith Hervey,1898-11-03,Fulham,England,GBR,Great Britain,,,1973-02-22
14577,14674,Keith Harrison,1933-03-28,Birmingham,England,GBR,Great Britain,,,
16166,16281,Keith Reynolds,1963-12-25,Solihull,England,GBR,Great Britain,173.0,68.0,
18734,18862,Keith Sinclair,1945-06-26,Sunderland,England,GBR,Great Britain,190.0,79.0,
29897,30123,Keith Langley,1961-06-03,Aldershot,England,GBR,Great Britain,173.0,70.0,
34011,34275,Keith Remfry,1947-11-17,Ealing,England,GBR,Great Britain,193.0,114.0,2015-09-16
46885,47234,Keith Collin,1937-01-18,Marylebone,England,GBR,Great Britain,168.0,63.0,1991-03-06
50929,51288,Keith Carter,1924-08-30,Akron,Ohio,USA,United States,,,2013-05-03
51185,51544,Keith Russell,1948-01-15,Mesa,Arizona,USA,United States,188.0,73.0,


## Adding / Removing Columns

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25.0
1,Monday,Latte,15.0
2,Tuesday,Espresso,
3,Tuesday,Latte,
4,Wednesday,Espresso,35.0


In [None]:
coffee['price'] = 4.99 # Add a new column with the same value

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold,price
0,Monday,Espresso,25.0,4.99
1,Monday,Latte,15.0,4.99
2,Tuesday,Espresso,,4.99
3,Tuesday,Latte,,4.99
4,Wednesday,Espresso,35.0,4.99


In [None]:
import numpy as np
# Add a new column with different values
# if coffe type equal to espresso so let new_price be 3.99 else 5.99
coffee['new_price'] = np.where(coffee['Coffee Type']=='Espresso', 3.99, 5.99)

In [None]:
coffee

Unnamed: 0,Day,Coffee Type,Units Sold,price,new_price
0,Monday,Espresso,25.0,4.99,3.99
1,Monday,Latte,15.0,4.99,5.99
2,Tuesday,Espresso,,4.99,3.99
3,Tuesday,Latte,,4.99,5.99
4,Wednesday,Espresso,35.0,4.99,3.99
5,Wednesday,Latte,25.0,4.99,5.99
6,Thursday,Espresso,40.0,4.99,3.99
7,Thursday,Latte,30.0,4.99,5.99
8,Friday,Espresso,45.0,4.99,3.99
9,Friday,Latte,35.0,4.99,5.99


In [None]:
# Drop a column
coffee.drop(columns=['price'], inplace=True)

# the below would also have worked but Changes will be saved for the Dataframe
# coffee = coffee.drop(columns=['price'])

In [None]:
coffee

Unnamed: 0,Day,Coffee Type,Units Sold,new_price
0,Monday,Espresso,25.0,3.99
1,Monday,Latte,15.0,5.99
2,Tuesday,Espresso,,3.99
3,Tuesday,Latte,,5.99
4,Wednesday,Espresso,35.0,3.99
5,Wednesday,Latte,25.0,5.99
6,Thursday,Espresso,40.0,3.99
7,Thursday,Latte,30.0,5.99
8,Friday,Espresso,45.0,3.99
9,Friday,Latte,35.0,5.99


In [None]:
coffee = coffee[['Day', 'Coffee Type', 'Units Sold', 'new_price']] # Keep only this columns so we here droped price

In [None]:
coffee

Unnamed: 0,Day,Coffee Type,Units Sold,new_price
0,Monday,Espresso,25.0,3.99
1,Monday,Latte,15.0,5.99
2,Tuesday,Espresso,,3.99
3,Tuesday,Latte,,5.99
4,Wednesday,Espresso,35.0,3.99
5,Wednesday,Latte,25.0,5.99
6,Thursday,Espresso,40.0,3.99
7,Thursday,Latte,30.0,5.99
8,Friday,Espresso,45.0,3.99
9,Friday,Latte,35.0,5.99


In [None]:
# Add new column
coffee['revenue'] = coffee['Units Sold'] * coffee['new_price']

In [None]:
coffee

Unnamed: 0,Day,Coffee Type,Units Sold,new_price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65
5,Wednesday,Latte,25.0,5.99,149.75
6,Thursday,Espresso,40.0,3.99,159.6
7,Thursday,Latte,30.0,5.99,179.7
8,Friday,Espresso,45.0,3.99,179.55
9,Friday,Latte,35.0,5.99,209.65


In [None]:
coffee.rename(columns={'new_price': 'price'}, inplace=True) # Rename a column
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65


In [None]:
# Now when we call cofee column name will be changed but not to the original Dataframe. It will be changed only for now.
# for change it and save it in the Dataframe we need to mention 'inplace=True' or do this 'coffee = ..' at first.

In [None]:
coffee

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65
5,Wednesday,Latte,25.0,5.99,149.75
6,Thursday,Espresso,40.0,3.99,159.6
7,Thursday,Latte,30.0,5.99,179.7
8,Friday,Espresso,45.0,3.99,179.55
9,Friday,Latte,35.0,5.99,209.65


In [None]:
bios_new = bios.copy() # Create a copy of the bios dataframe

In [None]:
bios_new['first_name'] = bios_new['name'].str.split(' ').str[0] # Create a new column with the first name of each athlete

In [None]:
# query(' ==""')
bios_new.query('first_name == "Keith"') # Find athletes with the first name "Keith"

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date,first_name
1897,1907,Keith Hanlon,1966-09-01,,,,Ireland,,,,Keith
3505,3517,Keith Wallace,1961-03-29,Preston,England,GBR,Great Britain,165.0,51.0,1999-12-31,Keith
6228,6255,Keith Hartley,1940-10-15,Vancouver,British Columbia,CAN,Canada,200.0,85.0,,Keith
8898,8946,Keith Mwila,1966-01-01,,,,Zambia,,,1993-01-09,Keith
12053,12118,Keith Hervey,1898-11-03,Fulham,England,GBR,Great Britain,,,1973-02-22,Keith
...,...,...,...,...,...,...,...,...,...,...,...
99921,100722,Keith Carney,1970-02-03,Providence,Rhode Island,USA,United States,188.0,93.0,,Keith
102227,103168,Keith Beavers,1983-02-09,London,Ontario,CAN,Canada,185.0,75.0,,Keith
109900,111105,Keith Cumberpatch,1927-08-25,Christchurch,Canterbury,NZL,New Zealand,,,2013-11-15,Keith
115973,117348,Keith Sanderson,1975-02-02,Plymouth,Massachusetts,USA,United States,183.0,95.0,,Keith


In [None]:
bios_new['born_datetime'] = pd.to_datetime(bios_new['born_date']) # Convert the 'born_date' column to datetime format in a new column called 'born_datetime'

In [None]:
bios_new.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date,first_name,born_datetime
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,Jean-François,1886-12-12
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,Arnaud,1969-04-01
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,Jean,1898-08-13
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,Jacques,1895-05-11
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,Albert,1878-04-17


In [None]:
bios_new['born_year'] = bios_new['born_datetime'].dt.year # Extract the year from the 'born_datetime' column and store it in a new column called 'born_year'

In [None]:
bios_new.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date,first_name,born_datetime,born_year
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,Jean-François,1886-12-12,1886.0
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,Arnaud,1969-04-01,1969.0
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,Jean,1898-08-13,1898.0
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,Jacques,1895-05-11,1895.0
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,Albert,1878-04-17,1878.0


In [None]:
bios_new[['name','born_year']]

Unnamed: 0,name,born_year
0,Jean-François Blanchy,1886.0
1,Arnaud Boetsch,1969.0
2,Jean Borotra,1898.0
3,Jacques Brugnon,1895.0
4,Albert Canet,1878.0
...,...,...
145495,Polina Luchnikova,2002.0
145496,Valeriya Merkusheva,1999.0
145497,Yuliya Smirnova,1998.0
145498,André Foussard,1899.0


In [None]:
# Create new csv file for all changes we have made it
bios_new.to_csv('/content/bios_new.csv', index=False)

In [None]:
# Using apply()
bios['height_category'] = bios['height_cm'].apply(lambda x: 'Short' if x < 165 else ('Average' if x < 185 else 'Tall')) # Create a new column called 'height_category' that categorizes athletes based on their height

In [None]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date,height_category
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,Tall
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,Average
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,Average
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,Average
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,Tall


In [None]:
def categorize_athlete(row):
    if row['height_cm'] < 175 and row['weight_kg'] < 70:
        return 'Lightweight'
    elif row['height_cm'] < 185 or row['weight_kg'] <= 80:
        return 'Middleweight'

    else:
        return 'Heavyweight'

bios['Category'] = bios.apply(categorize_athlete, axis=1) # Create a new column called 'Category' that categorizes athletes based on their height and weight

In [None]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date,height_category,Category
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,Tall,Heavyweight
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,Average,Middleweight
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,Average,Middleweight
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,Average,Lightweight
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,Tall,Heavyweight


## Merging & Concatenating Data

In [None]:
nocs = pd.read_csv('/content/noc_regions.csv')

In [None]:
# merge()
# left join
bios_new = pd.merge(bios, nocs, left_on='born_country', right_on='NOC', how='left') # Merge the bios dataframe with the nocs dataframe based on the 'born_country' and 'NOC' columns

In [None]:
bios_new.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC_x,height_cm,weight_kg,died_date,NOC_y,region,notes
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,FRA,France,
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,FRA,France,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,FRA,France,
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,FRA,France,
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,GBR,UK,


In [None]:
bios_new.rename(columns={'region': 'born_country_full'}, inplace=True) # Rename the 'region' column to 'born_country_full'

In [None]:
bios_new.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC_x,height_cm,weight_kg,died_date,NOC_y,born_country_full,notes
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02,FRA,France,
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,,FRA,France,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17,FRA,France,
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20,FRA,France,
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25,GBR,UK,


In [None]:
usa = bios[bios['born_country']=='USA'].copy() # Create a copy of the bios dataframe where the 'born_country' is 'USA'
gbr = bios[bios['born_country']=='GBR'].copy() # Create a copy of the bios dataframe where the 'born_country' is 'GBR'

In [None]:
# concat()
new_df = pd.concat([usa,gbr]) # Concatenate the 'usa' and 'gbr' dataframes vertically

In [None]:
new_df.head() # All USA

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
54,55,Monique Javer,1967-07-22,Burlingame,California,USA,Great Britain,177.0,64.0,
960,964,Xóchitl Escobedo,1968-09-17,West Covina,California,USA,Mexico,170.0,60.0,
961,965,Angélica Gavaldón,1973-10-03,El Centro,California,USA,Mexico,160.0,54.0,
1231,1238,Bert Schneider,1897-07-01,Cleveland,Ohio,USA,Canada,,,1986-02-20
1345,1352,Laura Berg,1975-01-06,Santa Fe Springs,California,USA,United States,168.0,61.0,


In [None]:
new_df.tail() # All GBR

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
144811,148512,Benjamin Alexander,1983-05-08,London,England,GBR,Jamaica,,,
144815,148517,Ashley Watson,1993-10-28,Peterborough,England,GBR,Jamaica,,,
145005,148716,Peder Kongshaug,2001-08-13,Wimbledon,England,GBR,Norway,184.0,86.0,
145319,149041,Axel Brown,1992-04-02,Harrogate,England,GBR,Trinidad and Tobago,,,
145388,149111,Jean-Luc Baker,1993-10-07,Burnley,England,GBR,United States,,,


In [None]:
# merge() on? the common column name in the both of Dataframes, how? inner,left,right join
combined_df = pd.merge(results, bios, on='athlete_id', how='left') # Merge the 'results' and 'bios' dataframes based on the 'athlete_id' column

In [None]:
combined_df.head()

Unnamed: 0,year,type,discipline,event,as,athlete_id,noc,team,place,tied,medal,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1912.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,17.0,True,,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,1912.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jean Montariol,,False,,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
2,1920.0,Summer,Tennis,"Singles, Men (Olympic)",Jean-François Blanchy,1,FRA,,32.0,True,,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
3,1920.0,Summer,Tennis,"Doubles, Mixed (Olympic)",Jean-François Blanchy,1,FRA,Jeanne Vaussard,8.0,True,,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
4,1920.0,Summer,Tennis,"Doubles, Men (Olympic)",Jean-François Blanchy,1,FRA,Jacques Brugnon,4.0,False,,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02


## Handling Null Values

In [None]:
coffee.loc[[2,3], 'Units Sold'] = np.nan # Set the 'Units Sold' column to NaN(null value) for rows 2 and 3

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65


In [None]:
# isna() - retrives the NaN values in the column
coffee[coffee['Units Sold'].isna()]

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,


In [None]:
# fillna()
coffee['Units Sold'].fillna(coffee['Units Sold'].mean()) # Fill the NaN values in the 'Units Sold' column with the mean of the column

Unnamed: 0,Units Sold
0,25.0
1,15.0
2,34.166667
3,34.166667
4,35.0
5,25.0
6,40.0
7,30.0
8,45.0
9,35.0


In [None]:
# inrtpolate()
coffee['Units Sold'].interpolate() # Fill the NaN values in the 'Units Sold' column using linear interpolation

Unnamed: 0,Units Sold
0,25.0
1,15.0
2,21.666667
3,28.333333
4,35.0
5,25.0
6,40.0
7,30.0
8,45.0
9,35.0


In [None]:
# dropna() deletes rows contains NaN values
# subset - specific column to drop the NaN values
# row 2 and 3
coffee.dropna(subset=['Units Sold']) # Use inplace=True if you want to update the coffee df

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
4,Wednesday,Espresso,35.0,3.99,139.65
5,Wednesday,Latte,25.0,5.99,149.75
6,Thursday,Espresso,40.0,3.99,159.6
7,Thursday,Latte,30.0,5.99,179.7
8,Friday,Espresso,45.0,3.99,179.55
9,Friday,Latte,35.0,5.99,209.65
10,Saturday,Espresso,45.0,3.99,179.55
11,Saturday,Latte,35.0,5.99,209.65


In [None]:
# notna() - retives all values that aren't NaN
coffee[coffee['Units Sold'].notna()]

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
4,Wednesday,Espresso,35.0,3.99,139.65
5,Wednesday,Latte,25.0,5.99,149.75
6,Thursday,Espresso,40.0,3.99,159.6
7,Thursday,Latte,30.0,5.99,179.7
8,Friday,Espresso,45.0,3.99,179.55
9,Friday,Latte,35.0,5.99,209.65
10,Saturday,Espresso,45.0,3.99,179.55
11,Saturday,Latte,35.0,5.99,209.65


In [None]:
coffee
# same as old contains NaN because we didn't update the coffe df

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65
5,Wednesday,Latte,25.0,5.99,149.75
6,Thursday,Espresso,40.0,3.99,159.6
7,Thursday,Latte,30.0,5.99,179.7
8,Friday,Espresso,45.0,3.99,179.55
9,Friday,Latte,35.0,5.99,209.65


## Aggregating Data

In [None]:
bios.head()

Unnamed: 0,athlete_id,name,born_date,born_city,born_region,born_country,NOC,height_cm,weight_kg,died_date
0,1,Jean-François Blanchy,1886-12-12,Bordeaux,Gironde,FRA,France,,,1960-10-02
1,2,Arnaud Boetsch,1969-04-01,Meulan,Yvelines,FRA,France,183.0,76.0,
2,3,Jean Borotra,1898-08-13,Biarritz,Pyrénées-Atlantiques,FRA,France,183.0,76.0,1994-07-17
3,4,Jacques Brugnon,1895-05-11,Paris VIIIe,Paris,FRA,France,168.0,64.0,1978-03-20
4,5,Albert Canet,1878-04-17,Wandsworth,England,GBR,France,,,1930-07-25


In [None]:
bios['born_city'].value_counts() # Count the occurrences of each unique value in the 'born_city' column

Unnamed: 0_level_0,count
born_city,Unnamed: 1_level_1
Budapest,1378
Moskva (Moscow),883
Oslo,708
Stockholm,629
Praha (Prague),600
...,...
Bodrogkisfalud,1
Ternberg,1
Klaus,1
Plaški,1


In [None]:
bios[bios['born_country']=='USA']['born_region'].value_counts().head(10) # Count the occurrences of each unique value in the 'born_region' column where the 'born_country' is 'USA'

Unnamed: 0_level_0,count
born_region,Unnamed: 1_level_1
California,1634
New York,990
Illinois,585
Pennsylvania,530
Massachusetts,530
New Jersey,381
Texas,368
Minnesota,365
Ohio,328
Michigan,319


In [None]:
bios[bios['born_country']=='USA']['born_region'].value_counts().tail(25)

Unnamed: 0_level_0,count
born_region,Unnamed: 1_level_1
Utah,91
Missouri,91
North Carolina,86
Arizona,83
New Hampshire,83
Vermont,68
Mississippi,66
Alabama,64
Kentucky,62
Tennessee,62


#### Groupby function in Pandas

In [None]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue
0,Monday,Espresso,25.0,3.99,99.75
1,Monday,Latte,15.0,5.99,89.85
2,Tuesday,Espresso,,3.99,
3,Tuesday,Latte,,5.99,
4,Wednesday,Espresso,35.0,3.99,139.65


In [None]:
coffee.groupby(['Coffee Type'])['Units Sold'].sum() # Group the data by 'Coffee Type' and calculate the sum of 'Units Sold' for each group

Unnamed: 0_level_0,Units Sold
Coffee Type,Unnamed: 1_level_1
Espresso,235.0
Latte,175.0


In [None]:
coffee.groupby(['Coffee Type'])['Units Sold'].mean() # Group the data by 'Coffee Type' and calculate the mean of 'Units Sold' for each group

Unnamed: 0_level_0,Units Sold
Coffee Type,Unnamed: 1_level_1
Espresso,39.166667
Latte,29.166667


In [None]:
coffee.groupby(['Coffee Type', 'Day']).agg({'Units Sold': 'sum', 'price': 'mean'}) # Group the data by 'Coffee Type' and 'Day', and calculate the sum and mean of 'Units Sold' and 'price' for each group

Unnamed: 0_level_0,Unnamed: 1_level_0,Units Sold,price
Coffee Type,Day,Unnamed: 2_level_1,Unnamed: 3_level_1
Espresso,Friday,45.0,3.99
Espresso,Monday,25.0,3.99
Espresso,Saturday,45.0,3.99
Espresso,Sunday,45.0,3.99
Espresso,Thursday,40.0,3.99
Espresso,Tuesday,0.0,3.99
Espresso,Wednesday,35.0,3.99
Latte,Friday,35.0,5.99
Latte,Monday,15.0,5.99
Latte,Saturday,35.0,5.99


#### Pivot Tables

In [None]:
# Used to access the data in many types
pivot = coffee.pivot(columns='Coffee Type', index='Day', values='revenue') # Create a pivot table with 'Coffee Type' as columns, 'Day' as index, and 'revenue' as values

In [None]:
pivot

Coffee Type,Espresso,Latte
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
Friday,179.55,209.65
Monday,99.75,89.85
Saturday,179.55,209.65
Sunday,179.55,209.65
Thursday,159.6,179.7
Tuesday,,
Wednesday,139.65,149.75


In [None]:
pivot.loc['Monday','Latte']

89.85000000000001

In [None]:
# Totla revenue for both columns
pivot.sum()

Unnamed: 0_level_0,0
Coffee Type,Unnamed: 1_level_1
Espresso,937.65
Latte,1048.25


In [None]:
pivot.sum(axis=1)

Unnamed: 0_level_0,0
Day,Unnamed: 1_level_1
Friday,389.2
Monday,189.6
Saturday,389.2
Sunday,389.2
Thursday,339.3
Tuesday,0.0
Wednesday,289.4


#### Using datetime with Groupby

In [None]:
bios['born_date'] = pd.to_datetime(bios['born_date']) # Convert the 'born_date' column to datetime format
bios['month_born'] = bios['born_date'].dt.month # Extract the month from the 'born_date' column and store it in a new column called 'month_born'
bios['year_born'] = bios['born_date'].dt.year # Extract the year from the 'born_date' column and store it in a new column called 'year_born'
bios.groupby([bios['year_born'],bios['month_born']])['name'].count().reset_index().sort_values('name', ascending=False) # Group the data by 'year_born' and 'month_born', and count the number of athletes for each group

Unnamed: 0,year_born,month_born,name
1437,1970.0,1.0,239
1461,1972.0,1.0,229
1629,1986.0,1.0,227
1497,1975.0,1.0,227
1617,1985.0,1.0,225
...,...,...,...
95,1857.0,5.0,1
96,1857.0,7.0,1
97,1857.0,8.0,1
98,1857.0,9.0,1


## Advanced Functionality

In [None]:
# shift() rank() cumsum() rolling()

In [None]:
latte = coffee[coffee['Coffee Type']=="Latte"].copy() # Create a copy of the coffee dataframe where the 'Coffee Type' is 'Latte'
latte['3day'] = latte['Units Sold'].rolling(3).sum() # Create a new column called '3day' that calculates the sum of 'Units Sold' for the previous 3 days

In [None]:
latte

Unnamed: 0,Day,Coffee Type,Units Sold,price,revenue,3day
1,Monday,Latte,15.0,5.99,89.85,
3,Tuesday,Latte,,5.99,,
5,Wednesday,Latte,25.0,5.99,149.75,
7,Thursday,Latte,30.0,5.99,179.7,
9,Friday,Latte,35.0,5.99,209.65,90.0
11,Saturday,Latte,35.0,5.99,209.65,100.0
13,Sunday,Latte,35.0,5.99,209.65,105.0
