In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import io

In [None]:
#Query 5
df = pd.read_csv("Query 2.csv")

# 3. Convert 'date_id' from integer format (YYYYMMDD) to datetime objects.
df['date'] = pd.to_datetime(df['date_id'], format='%Y%m%d')

# 4. Sort the dataframe by date to ensure the line plot connects points chronologically.
df = df.sort_values('date')

# 5. Set up the plot.
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax = plt.subplots(figsize=(14, 8))

# 6. Plot the data for each company.
# We group by 'company_id' and plot the time series for each one.
for company_id, group in df.groupby('company_id'):
    ax.plot(group['date'], group['close_price'], marker='o', linestyle='-', label=f'Company {company_id}')

# 7. Customize the plot for better readability.
ax.set_title('Time Series of Closing Prices by Company', fontsize=18)
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Close Price', fontsize=14)
ax.legend(title='Company ID')
ax.grid(True, which='both', linestyle='--', linewidth=0.5)

# Automatically format the date labels on the x-axis to be more readable.
plt.gcf().autofmt_xdate()

# 8. Show the plot.
plt.show()

In [None]:
#Query 9
df = pd.read_csv("Query 4.csv")

# 3. Set up the plot.
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax = plt.subplots(figsize=(12, 8))

# 4. Create the scatter plot.
# We'll use 'return_volatility' for the x-axis and 'avg_daily_return' for the y-axis.
# The size of the points will be proportional to 'avg_volume'.
# We scale the 'avg_volume' to make the point sizes visually appealing.
scatter = ax.scatter(
    df['return_volatility'],
    df['avg_daily_return'],
    s=np.sqrt(df['avg_volume']) * 0.5,  # Scale size for better visualization
    alpha=0.7,
    cmap='viridis',
    c=df['avg_daily_return'] # Color points by return for better distinction
)

# 5. Add labels to each point.
# This loop adds the ticker symbol next to each point on the scatter plot.
for i, txt in enumerate(df['ticker']):
    ax.annotate(txt, (df['return_volatility'][i], df['avg_daily_return'][i]), textcoords="offset points", xytext=(5,-5), ha='center')

# 6. Customize the plot.
ax.set_title('Risk vs. Return of Companies', fontsize=16)
ax.set_xlabel('Return Volatility (Risk)', fontsize=12)
ax.set_ylabel('Average Daily Return', fontsize=12)
ax.grid(True)

# Add a color bar
cbar = plt.colorbar(scatter)
cbar.set_label('Average Daily Return')


# 7. Show the plot.
plt.show()

In [None]:
#Query 10
import pandas as pd
import matplotlib.pyplot as plt
import io

df = pd.read_csv("Query 3.csv")

plt.style.use('seaborn-v0_8-talk')
fig, ax = plt.subplots(figsize=(10, 8))

# 4. Create the pie chart.
# 'explode' will offset a slice of the pie. We'll explode the first slice (TOPS) to highlight it.
explode = [0.1] + [0] * (len(df) - 1)  # Explode the first slice (0.1 indicates the fraction of the radius to offset)

# Define a function for the autopct to prevent labels on tiny slices
def my_autopct(pct):
    return ('%1.1f%%' % pct) if pct > 1 else ''

wedges, texts, autotexts = ax.pie(
    df['avg_adj_close'],
    labels=df['ticker'],
    autopct=my_autopct,
    startangle=140,
    explode=explode,
    shadow=True,
    pctdistance=0.85 # Distance of the percentage text from the center
)


# 5. Customize the plot.
ax.set_title('Proportion of Total Average Adjusted Close Price by Company', fontsize=16, pad=20)

# Equal aspect ratio ensures that pie is drawn as a circle.
ax.axis('equal')

# Improve the legend
# Create a legend that shows both the ticker and the actual value, which is more informative than the default.
total = df['avg_adj_close'].sum()
labels = [f'{l}, {s:,.2f} ({s/total:.2%})' for l, s in zip(df['ticker'], df['avg_adj_close'])]

plt.legend(wedges, labels,
          title="Companies",
          loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))


# 6. Show the plot.
plt.show()

In [None]:
#Query 1

df = pd.read_csv("Query 6.csv")

# 3. Set up the plot style.
sns.set_style("whitegrid")
plt.figure(figsize=(12, 8))

# Define a custom color palette for sentiment
palette = {"positive": "g", "negative": "r", "neutral": "gray"}
order = ['positive', 'neutral', 'negative']

# 4. Create the box plot to show the distribution (quartiles, median).
sns.boxplot(
    x='sentiment',
    y='avg_daily_return',
    data=df,
    order=order,
    palette=palette,
    width=0.5,
    boxprops=dict(alpha=.3)
)

# 5. Create the swarm plot to show individual data points.
# This adds another layer of detail over the box plot.
sns.swarmplot(
    x='sentiment',
    y='avg_daily_return',
    data=df,
    order=order,
    palette=palette,
    alpha=0.8,
    s=8 # size of the markers
)

# 6. Customize the plot.
plt.title('Distribution of Average Daily Returns by News Sentiment', fontsize=18, pad=20)
plt.xlabel('Sentiment', fontsize=14)
plt.ylabel('Average Daily Return', fontsize=14)
plt.axhline(0, color='black', linestyle='--', linewidth=1) # Add a zero line for reference
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)


# 7. Show the plot.
plt.show()