# Ex2 - Getting and Knowing your Data

This time we are going to pull data directly from the internet.
Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [None]:
import pandas as pd
import numpy as np

print("Libraries imported.")


### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

In [None]:
# URL for the Chipotle dataset (TSV format)
url = "https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"

# Load the dataset
chipo = pd.read_csv(url, sep='\t')

# Clean the dataset: drop missing values and duplicates
chipo.dropna(inplace=True)
chipo.drop_duplicates(inplace=True)

print("Data loaded, cleaned, and ready for processing.")


### Step 3. Assign it to a variable called chipo.

In [None]:
# The dataset is already assigned to the variable 'chipo'
print("Dataset is assigned to variable 'chipo'.")


### Step 4. See the first 10 entries

In [None]:
# Display the first 10 rows of the dataset
chipo.head(10)


### Step 5. What is the number of observations in the dataset?

In [None]:

# Number of rows (observations)
num_observations = chipo.shape[0]
print("Number of observations:", num_observations)





### Step 6. What is the number of columns in the dataset?

In [None]:
# Number of columns
num_columns = chipo.shape[1]
print("Number of columns:", num_columns)


### Step 7. Print the name of all the columns.

In [None]:
# Print all column names
print("Column names:")
print(chipo.columns.tolist())


### Step 8. How is the dataset indexed?

In [None]:
# Display the index details of the DataFrame
print("Dataset index:")
print(chipo.index)


### Step 9. Which was the most-ordered item? 

In [None]:
# Find the most ordered item by counting occurrences in 'item_name'
most_ordered_item = chipo['item_name'].value_counts().idxmax()
print("Most-ordered item:", most_ordered_item)


### Step 10. For the most-ordered item, how many items were ordered?

In [None]:
# Get the count (frequency) for the most-ordered item
most_ordered_count = chipo['item_name'].value_counts().max()
print("Number of orders for", most_ordered_item, ":", most_ordered_count)


### Step 11. What was the most ordered item in the choice_description column?

In [None]:
# Find the most common value in the 'choice_description' column.
# (Often many entries are 'NaN' or empty; adjust if needed.)
most_common_choice = chipo['choice_description'].value_counts().idxmax()
print("Most common choice description:", most_common_choice)


### Step 12. How many items were orderd in total?

In [None]:
# Sum the 'quantity' column to get the total number of items ordered
total_items_ordered = chipo['quantity'].sum()
print("Total items ordered:", total_items_ordered)


### Step 13. Turn the item price into a float

In [None]:
# Convert the 'item_price' column from a string (with '$') to a float
chipo['item_price'] = chipo['item_price'].apply(lambda x: float(x.replace('$', '')))
print("Item price converted to float.")


#### Step 13.a. Check the item price type

In [None]:
# Check the current data type of the 'item_price' column
print("Before conversion, item_price type:", chipo['item_price'].dtype)


#### Step 13.b. Create a lambda function and change the type of item price

In [None]:
# Convert the 'item_price' from string to float.
# If the price contains a dollar sign, remove it first.
chipo['item_price'] = chipo['item_price'].apply(lambda x: float(x.replace('$','')) if isinstance(x, str) else float(x))
print("Conversion of item_price completed.")


#### Step 13.c. Check the item price type

In [None]:
# Verify that the conversion was successful
print("After conversion, item_price type:", chipo['item_price'].dtype)


### Step 14. How much was the revenue for the period in the dataset?

In [None]:
# Calculate revenue for each order line and then sum for the total revenue.
# Revenue per line = quantity * item_price
total_revenue = (chipo['quantity'] * chipo['item_price']).sum()
print("Total revenue for the period: $", total_revenue)


### Step 15. How many orders were made in the period?

In [None]:
# Count the unique order IDs
num_orders = chipo['order_id'].nunique()
print("Total number of orders:", num_orders)


### Step 16. What is the average revenue amount per order?

In [None]:

# Calculate the average revenue per order
avg_revenue_per_order = total_revenue / num_orders
print("Average revenue per order: $", round(avg_revenue_per_order, 2))





### Step 17. How many different items are sold?

In [None]:
# Count the number of unique items sold
num_unique_items = chipo['item_name'].nunique()
print("Different items sold:", num_unique_items)
