# Pandas Tools Demonstration (Chipotle Dataset)

## Setup + Load Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("chipotle.tsv", sep="\t")
df.head()


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


## 1. I/O Tools

In [6]:
#chuyển từ tsv -> csv
df = pd.read_csv("chipotle.tsv", sep="\t")
df.to_csv("chipotle_export.csv", index=False)
print("Exported to chipotle_export.csv")


Exported to chipotle_export.csv


## 2. Data Cleaning

In [7]:
#làm sạch item_price bằng cách bỏ $. 
#Ví dụ: $8.49 -> 9.49
df_clean = df.copy()

df_clean["item_price"] = (
    df_clean["item_price"]
    .astype(str)
    .str.replace("$", "", regex=False)
    .astype(float)
)

df_clean[["item_price"]].head()


Unnamed: 0,item_price
0,2.39
1,3.39
2,3.39
3,2.39
4,16.98


## 3. Selection & Filtering

In [8]:
#lọc các dòng có giá > 10.
df_clean[df_clean["item_price"] > 10].head(10)


Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",11.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25
23,12,1,Chicken Burrito,"[[Tomatillo-Green Chili Salsa (Medium), Tomati...",10.98
39,19,1,Barbacoa Bowl,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",11.75
42,20,1,Chicken Bowl,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",11.25
43,20,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Pinto Beans, Chees...",11.75
45,21,1,Chicken Burrito,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",10.98
52,24,1,Chicken Burrito,"[Roasted Chili Corn Salsa (Medium), [Black Bea...",10.98


## 4.Data Transformation

In [9]:
df_clean["total_price"] = df_clean["quantity"] * df_clean["item_price"]
df_clean[["quantity", "item_price", "total_price"]].head()


Unnamed: 0,quantity,item_price,total_price
0,1,2.39,2.39
1,1,3.39,3.39
2,1,3.39,3.39
3,1,2.39,2.39
4,2,16.98,33.96
