In [2]:
# Example 1: Numeric Series - A numeric Series with default integer indices (0, 1, 2, 3, 4)

import pandas as pd
# Creating a Series from a list of numbers
data = [10, 20, 30, 40, 50]
series = pd.Series(data)
# Indexing by Position: retrieving the value at position 2
print(series[2])

30


In [3]:
# Example 2: Labeled Series - A Series where each data point is associated with a custom label

data = [10, 20, 30]
labels = ['A', 'B', 'C']
series = pd.Series(data, index=labels)
# Indexing by label: Retrieving the value associated with the label 'B'
print(series['B'])

20


In [4]:
# Example 3: Mixed-Type Series: This Series can hold different data types, including integers, strings, floats, and None (null)

data = [1, 'two', 3.0, None]
series = pd.Series(data)
print(series)

0       1
1     two
2     3.0
3    None
dtype: object


In [5]:
# Example 4: Data Series contains datetime objects that can be used for time series analysis.

from datetime import datetime
# Creating a Series of dates
dates = [datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 3)]
series = pd.Series(dates)
print(series)

0   2023-01-01
1   2023-01-02
2   2023-01-03
dtype: datetime64[ns]


In [6]:
# Example 5: Boolean Series holds boolean values. You can use it for logical operations and filtering data
data = [True, False, True]
series = pd.Series(data)
print(series)

0     True
1    False
2     True
dtype: bool


In [7]:
## creating Dataframe objects

city_names = pd.Series(['Kajaani', 'Helsinki', 'Oulu'])
population = pd.Series([48246, 1101578, 485191])

pd.DataFrame({ 'City name': city_names, 'Population': population })

Unnamed: 0,City name,Population
0,Kajaani,48246
1,Helsinki,1101578
2,Oulu,485191


In [13]:
# Import the dataset from this url https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv
# Assign the url to a variable called url and create a dataframe named chipo.
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"
chipo = pd.read_csv(url, sep = '\t')

In [14]:
# to display the first 10 rows of the Pandas DataFrame called chipo
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


In [20]:
# to display the first 10 rows of the Pandas DataFrame called chipo
chipo.shape[0]

4622

In [17]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


In [21]:
# To return the the number of rows and columns of the DataFrame.
chipo.shape

(4622, 5)

In [22]:
# To return the number of columns in the dataset
chipo.shape[1]

5

In [23]:
# To Print the name of all the columns.
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [24]:
# To return the index labels used in the chipo DataFrame
chipo.index

RangeIndex(start=0, stop=4622, step=1)

In [25]:
# To return a two-dimensional NumPy array that contains the data stored in the DataFrame.
chipo.values

array([[1, 1, 'Chips and Fresh Tomato Salsa', nan, '$2.39 '],
       [1, 1, 'Izze', '[Clementine]', '$3.39 '],
       [1, 1, 'Nantucket Nectar', '[Apple]', '$3.39 '],
       ...,
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Pinto Beans, Guacamole, Lettuce]]',
        '$11.25 '],
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Lettuce]]', '$8.75 '],
       [1834, 1, 'Chicken Salad Bowl',
        '[Fresh Tomato Salsa, [Fajita Vegetables, Pinto Beans, Lettuce]]',
        '$8.75 ']], dtype=object)

In [26]:
# Get data types of multiple columns
column_data_types = chipo.dtypes
print(column_data_types)

order_id               int64
quantity               int64
item_name             object
choice_description    object
item_price            object
dtype: object


In [27]:
# Get data type of a single column
column_data_type = chipo['item_price'].dtype
print(column_data_type)

object


In [28]:
# chipo is the name of the dataframe
# sort_values sorts the item in ascending order based on the column item_price
chipo.sort_values("item_price")

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
261,113,1,Canned Soda,[Mountain Dew],$1.09
1805,730,1,Canned Soda,[Sprite],$1.09
1030,424,1,Canned Soda,[Diet Dr. Pepper],$1.09
3020,1201,1,Canned Soda,[Diet Dr. Pepper],$1.09
3021,1201,1,Bottled Water,,$1.09
...,...,...,...,...,...
4547,1807,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39
4391,1752,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Black Beans, Pinto Beans...",$9.39
2600,1032,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Lettu...",$9.39
4241,1693,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39


In [29]:
# Sorting in Descending Order
chipo.sort_values("item_price", ascending=False)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
2624,1042,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Black Beans, Sour Cream,...",$9.39
4419,1762,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39
4036,1615,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Chees...",$9.39
1825,738,1,Barbacoa Salad Bowl,"[Fresh Tomato Salsa, [Rice, Pinto Beans, Chees...",$9.39
3115,1243,1,Carnitas Salad Bowl,"[Tomatillo Green Chili Salsa, [Rice, Pinto Bea...",$9.39
...,...,...,...,...,...
3145,1254,1,Canned Soda,[Diet Dr. Pepper],$1.09
414,180,1,Canned Soda,[Dr. Pepper],$1.09
3162,1262,1,Canned Soda,[Coca Cola],$1.09
821,338,1,Canned Soda,[Coca Cola],$1.09


In [30]:
# sorting by multiple columns
chipo.sort_values(["item_price", "quantity"])

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
28,14,1,Canned Soda,[Dr. Pepper],$1.09
34,17,1,Bottled Water,,$1.09
53,24,1,Canned Soda,[Sprite],$1.09
87,38,1,Bottled Water,,$1.09
107,47,1,Canned Soda,[Dr. Pepper],$1.09
...,...,...,...,...,...
4390,1751,1,Barbacoa Salad Bowl,"[Tomatillo Green Chili Salsa, [Fajita Vegetabl...",$9.39
4391,1752,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Black Beans, Pinto Beans...",$9.39
4419,1762,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39
4547,1807,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39


In [31]:
# sorting by multiple columns : first ascending and then in descending order
chipo.sort_values(["item_price", "quantity"], ascending=[True, False])

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
28,14,1,Canned Soda,[Dr. Pepper],$1.09
34,17,1,Bottled Water,,$1.09
53,24,1,Canned Soda,[Sprite],$1.09
87,38,1,Bottled Water,,$1.09
107,47,1,Canned Soda,[Dr. Pepper],$1.09
...,...,...,...,...,...
4390,1751,1,Barbacoa Salad Bowl,"[Tomatillo Green Chili Salsa, [Fajita Vegetabl...",$9.39
4391,1752,1,Steak Salad Bowl,"[Fresh Tomato Salsa, [Black Beans, Pinto Beans...",$9.39
4419,1762,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39
4547,1807,1,Steak Salad Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$9.39


In [32]:
# selecting a single column called item_price
subset_chipo = chipo["item_price"]
print(subset_chipo)

0        $2.39 
1        $3.39 
2        $3.39 
3        $2.39 
4       $16.98 
         ...   
4617    $11.75 
4618    $11.75 
4619    $11.25 
4620     $8.75 
4621     $8.75 
Name: item_price, Length: 4622, dtype: object


In [33]:
# selecting multiple columns
subset_chipo1 = chipo[["item_price", "quantity", "item_name"]]
print(subset_chipo1)

     item_price  quantity                              item_name
0        $2.39          1           Chips and Fresh Tomato Salsa
1        $3.39          1                                   Izze
2        $3.39          1                       Nantucket Nectar
3        $2.39          1  Chips and Tomatillo-Green Chili Salsa
4       $16.98          2                           Chicken Bowl
...         ...       ...                                    ...
4617    $11.75          1                          Steak Burrito
4618    $11.75          1                          Steak Burrito
4619    $11.25          1                     Chicken Salad Bowl
4620     $8.75          1                     Chicken Salad Bowl
4621     $8.75          1                     Chicken Salad Bowl

[4622 rows x 3 columns]


In [34]:
# getting items whose order id is greater than 1000 (make sure the column's data type is integer)
subset_df = chipo[chipo['order_id'] > 1000]
print(subset_df)

      order_id  quantity           item_name  \
2519      1001         1       Steak Burrito   
2520      1001         1         Canned Soda   
2521      1002         1    Barbacoa Burrito   
2522      1002         1       Side of Chips   
2523      1003         1    Carnitas Burrito   
...        ...       ...                 ...   
4617      1833         1       Steak Burrito   
4618      1833         1       Steak Burrito   
4619      1834         1  Chicken Salad Bowl   
4620      1834         1  Chicken Salad Bowl   
4621      1834         1  Chicken Salad Bowl   

                                     choice_description item_price  
2519  [Fresh Tomato Salsa (Mild), [Pinto Beans, Rice...     $8.99   
2520                                        [Coca Cola]     $1.09   
2521  [[Tomatillo-Green Chili Salsa (Medium), Tomati...     $8.99   
2522                                                NaN     $1.69   
2523  [Tomatillo Red Chili Salsa, [Black Beans, Chee...    $11.75   
...      

In [35]:
# subsetting based on text data
subset_df = chipo[chipo['item_name'] == "Steak Burrito"]
print(subset_df)

      order_id  quantity      item_name  \
7            4         1  Steak Burrito   
9            5         1  Steak Burrito   
31          16         1  Steak Burrito   
43          20         1  Steak Burrito   
46          21         1  Steak Burrito   
...        ...       ...            ...   
4546      1807         1  Steak Burrito   
4607      1829         1  Steak Burrito   
4610      1830         1  Steak Burrito   
4617      1833         1  Steak Burrito   
4618      1833         1  Steak Burrito   

                                     choice_description item_price  
7     [Tomatillo Red Chili Salsa, [Fajita Vegetables...    $11.75   
9     [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...     $9.25   
31    [[Roasted Chili Corn Salsa (Medium), Fresh Tom...     $8.99   
43    [Fresh Tomato Salsa, [Rice, Pinto Beans, Chees...    $11.75   
46    [Tomatillo-Red Chili Salsa (Hot), [Rice, Fajit...     $8.99   
...                                                 ...        ...  


In [36]:
# subsetting with mulitple conditions
subset_df1 = chipo['order_id'] > 1000
subset_df2 = chipo['item_name'] == "Steak Burrito"
subset_df3 = chipo[subset_df1 & subset_df2]
subset_df3.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
2519,1001,1,Steak Burrito,"[Fresh Tomato Salsa (Mild), [Pinto Beans, Rice...",$8.99
2561,1014,1,Steak Burrito,"[Fresh Tomato Salsa (Mild), [Cheese, Rice, Pin...",$8.99
2595,1030,1,Steak Burrito,"[Roasted Chili Corn Salsa, [Rice, Cheese]]",$9.25
2625,1043,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Fajita Vegetables,...",$11.75
2659,1056,1,Steak Burrito,"[Roasted Chili Corn Salsa (Medium), [Rice, Faj...",$8.99


In [37]:
# if you check the data type for item_price column it is object and therefore we need to change the data type to float or integer to perform calculations
print(chipo['item_price'].dtype)

object


In [43]:
# To change the data type of a column in a Pandas DataFrame to an integer type, you can use the .astype() method.
# Since item_price has a  dollar sign, lets remove it and convert to integer
chipo['item_price'] = chipo['item_price'].str.replace('$', '').astype(float)

AttributeError: Can only use .str accessor with string values!

In [41]:
# Now lets make a new column called total by multiplying 'quantity' and 'item_price'
chipo['total'] = chipo['quantity'] * chipo['item_price']

In [42]:
# lets check if we have the new column total
chipo.head(20)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,total
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,[Clementine],3.39,3.39
2,1,1,Nantucket Nectar,[Apple],3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,33.96
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98,10.98
6,3,1,Side of Chips,,1.69,1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75,11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",9.25,9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",9.25,9.25


In [44]:
# first lets create a function to define the price ranges
def categorize_price(item_price):
    if item_price < 5:
        return 'Low'
    elif item_price < 10:
        return 'Medium'
    else:
        return 'High'
# We can use the .apply() method to create a new column 'price_group' based on the 'item_price' column.
chipo['price_group'] = chipo['item_price'].apply(categorize_price)
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,total,price_group
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39,Low
1,1,1,Izze,[Clementine],3.39,3.39,Low
2,1,1,Nantucket Nectar,[Apple],3.39,3.39,Low
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39,Low
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98,33.96,High
