In [13]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the Chipotle data into a DataFrame with error handling
try:
    chipo = pd.read_csv('data.csv', delimiter=',', quotechar='"', on_bad_lines='skip')
except pd.errors.ParserError as e:
    print(f"Error parsing CSV: {e}")
    # Handle the error or exit
    exit()

# Review the first 10 entries
print(chipo.head(10))

# Check if 'item_name' column exists
if 'item_name' in chipo.columns:
    # Create a histogram of the top 5 items bought
    top_items = chipo['item_name'].value_counts().head(5)
    top_items.plot(kind='bar')
    plt.xlabel('Items')
    plt.ylabel('Number of Times Ordered')
    plt.title('Top 5 Most Ordered Items')
    plt.show()
else:
    print("Column 'item_name' does not exist in the DataFrame.")

# Check if 'item_price' column exists
if 'item_price' in chipo.columns:
    # Convert item_price to float
    chipo['item_price'] = chipo['item_price'].str.replace('$', '').astype(float)

    # Group by order_id and sum the quantities and prices
    order_summary = chipo.groupby('order_id').sum()

    # Create the scatter plot
    plt.scatter(order_summary['item_price'], order_summary['quantity'])
    plt.xlabel('Order Price')
    plt.ylabel('Number of Items Ordered')
    plt.title('Number of Items Ordered per Order Price')
    plt.show()

    # Formulate a question and provide an answer using a suitable graph
    # Question: What is the distribution of order prices?
    order_summary['item_price'].plot(kind='hist', bins=20)
    plt.xlabel('Order Price')
    plt.ylabel('Frequency')
    plt.title('Distribution of Order Prices')
    plt.show()
else:
    print("Column 'item_price' does not exist in the DataFrame.")


  order_id\tquantity\titem_name\tchoice_description\titem_price
0   1\t1\tChips and Fresh Tomato Salsa\tNULL\t$2.39            
1                   1\t1\tIzze\t[Clementine]\t$3.39            
2            1\t1\tNantucket Nectar\t[Apple]\t$3.39            
3  1\t1\tChips and Tomatillo-Green Chili Salsa\tN...           
4                  3\t1\tSide of Chips\tNULL\t$1.69            
5            5\t1\tChips and Guacamole\tNULL\t$4.45            
6            7\t1\tChips and Guacamole\tNULL\t$4.45            
7  8\t1\tChips and Tomatillo-Green Chili Salsa\tN...           
8                9\t2\tCanned Soda\t[Sprite]\t$2.18            
9           10\t1\tChips and Guacamole\tNULL\t$4.45            
Column 'item_name' does not exist in the DataFrame.
Column 'item_price' does not exist in the DataFrame.


In [15]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming investor_data is already loaded and has a 'Date' column and 'Total Equity' column

# Check if 'Date' column exists
if 'Date' in investor_data.columns:
    # Change the ‘Date’ column to datetime format
    investor_data['Date'] = pd.to_datetime(investor_data['Date'])

    # Set ‘Date’ as the index of the DataFrame
    investor_data.set_index('Date', inplace=True)

    # Change the frequency of the ‘investor_data’ DataFrame to yearly
    yearly_data = investor_data.resample('Y').sum()

    # Formulate a question and provide an answer
    # Question: What is the trend of 'Total Equity' over the years?
    # Answer: Plot the yearly data to visualize the trend
    yearly_data['Total Equity'].plot(title='Total Equity Over the Years')
    plt.xlabel('Year')
    plt.ylabel('Total Equity')
    plt.show()
else:
    print("Column 'Date' does not exist in the DataFrame.")


Column 'Date' does not exist in the DataFrame.
