In [1]:
import pandas as pd
from datetime import datetime, timedelta

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(r'C:\Users\MK\Desktop\Project_Uno\Database\World Stock Prices (Daily Updating)\World Stock Prices Dataset.csv')

# Specify the date format of your 'Date' column
date_format = '%Y-%m-%d %H:%M:%S%z'  # Adjust this format to match your data

# Convert the 'Date' column to datetime format with the specified date format
df['Date'] = pd.to_datetime(df['Date'], format=date_format)

# Make the cutoff date offset-aware by specifying the timezone
cutoff_date = datetime(2023, 9, 20) - timedelta(days=365 * 2)
cutoff_date = cutoff_date.replace(tzinfo=df['Date'].iloc[0].tzinfo)  # Use the timezone from the DataFrame

# Filter the DataFrame to keep only the date-times on or after the cutoff date
filtered_df = df[df['Date'] >= cutoff_date].copy()  # Create a copy to avoid SettingWithCopyWarning

# Calculate daily returns using the Open and Close columns
filtered_df.loc[:, 'Daily_Return'] = ((filtered_df['Close'] - filtered_df['Open']) / filtered_df['Open']) * 100

# Calculate Volatility as the percentage difference between Open and High
filtered_df.loc[:, 'Volatility'] = ((filtered_df['High'] - filtered_df['Low']) / filtered_df['Low']) * 100

# Check if columns need conversion to numeric
if filtered_df['Daily_Return'].dtype == 'object':
    filtered_df['Daily_Return'] = filtered_df['Daily_Return'].str.rstrip('%').astype(float)

if filtered_df['Volatility'].dtype == 'object':
    filtered_df['Volatility'] = filtered_df['Volatility'].str.rstrip('%').astype(float)

# Calculate the mean of 'Daily_Return' and 'Volatility' for each 'Brand_Name'
averages = filtered_df.groupby('Brand_Name', as_index=False)[['Daily_Return', 'Volatility']].mean()

# Rename the 'Daily_Return' column to 'AVG ROI'
averages.rename(columns={'Daily_Return': 'AVG ROI'}, inplace=True)

# Filter the averages DataFrame to keep only companies with a daily return average >= 0.15
filtered_averages = averages[averages['AVG ROI'] >= 0.08]

# Filter the filtered_averages DataFrame to keep only companies with average volatility >= 2
filtered_averages = filtered_averages[filtered_averages['Volatility'] >= 2]

# Sort the filtered_averages DataFrame by 'AVG ROI' in descending order
filtered_averages = filtered_averages.sort_values(by='AVG ROI', ascending=False)

# Round the 'AVG ROI' and 'Volatility' columns to two decimal places
filtered_averages['AVG ROI'] = filtered_averages['AVG ROI'].round(2)
filtered_averages['Volatility'] = filtered_averages['Volatility'].round(2)

# Display the resulting DataFrame
print(filtered_averages)

# Save the cleaned data to a new CSV file
filtered_averages.to_csv('high-risk-strategy-database.csv', index=False)


    Brand_Name  AVG ROI  Volatility
13    coinbase     0.22        8.25
3       airbnb     0.18        4.30
36      nvidia     0.15        4.50
8        apple     0.13        2.33
55        uber     0.11        4.38
29  mastercard     0.10        2.32
26    logitech     0.10        2.30
18       fedex     0.09        2.44
43      roblox     0.09        6.71
