In [1]:
import numpy as np
import pandas as pd

In [4]:
# Load the CSV file
port_df = pd.read_csv('predictions_output_1403_test_final.csv', index_col=[0, 1])  # Assuming the file has multi-index (Company, pdate)

# Create an empty DataFrame to store the final combined data
combined_df = pd.DataFrame()

# Filter the DataFrame for the year 1403
filtered_df = port_df.xs(1403, level='pdate').copy()

# Create a new DataFrame to repeat each row 12 times (once for each month)
expanded_df = filtered_df.loc[filtered_df.index.repeat(6)].copy()

# Generate pdate column with 'year-month' format
expanded_df['pdate'] = [f"1403-{str(month).zfill(2)}" for month in range(1, 7)] * len(filtered_df)

# Append the expanded DataFrame to the combined DataFrame
combined_df = pd.concat([combined_df, expanded_df])

# Reset the index if necessary
combined_df.reset_index(inplace=True)

print("Done")


Done


In [5]:
combined_df.head(24)

Unnamed: 0,Company,predicted_target,pdate
0,PAKS,-0.198042,1403-01
1,PAKS,-0.198042,1403-02
2,PAKS,-0.198042,1403-03
3,PAKS,-0.198042,1403-04
4,PAKS,-0.198042,1403-05
5,PAKS,-0.198042,1403-06
6,ALMR,-0.452371,1403-01
7,ALMR,-0.452371,1403-02
8,ALMR,-0.452371,1403-03
9,ALMR,-0.452371,1403-04


In [6]:
combined_df.tail(24)

Unnamed: 0,Company,predicted_target,pdate
486,KNOOR,0.573662,1403-01
487,KNOOR,0.573662,1403-02
488,KNOOR,0.573662,1403-03
489,KNOOR,0.573662,1403-04
490,KNOOR,0.573662,1403-05
491,KNOOR,0.573662,1403-06
492,KHZAMYA,0.422561,1403-01
493,KHZAMYA,0.422561,1403-02
494,KHZAMYA,0.422561,1403-03
495,KHZAMYA,0.422561,1403-04


In [7]:
# Load the company names
names = pd.read_csv('name.csv')["name"]

# Function to calculate mean F9 for each company and year-month
def calculate_mean_f9_for_month(company_file):
    company_df = pd.read_csv(company_file)
    
    # Extract year-month from the pdate column
    company_df['year_month'] = company_df['pdate'].str.slice(0, 7)
    
    # Clean the F9 column: remove nulls and replace non-numeric values with NaN
    company_df['F9'] = pd.to_numeric(company_df['F9'], errors='coerce')
    
    # Calculate the mean F9 for each year-month
    mean_f9_by_month = company_df.groupby('year_month')['F9'].mean().to_dict()
    
    return mean_f9_by_month

# Loop through each company to calculate and assign the mean F9 for each year-month
for company in names:
    company_file = f'data/{company}.csv'
    
    # Calculate the mean F9 for each year-month for the current company
    mean_f9_for_month = calculate_mean_f9_for_month(company_file)
    
    # Filter the combined DataFrame for the current company
    company_mask = (combined_df['Company'] == company)
    
    # Map the calculated mean F9 values to the corresponding rows in combined_df
    combined_df.loc[company_mask, 'Mean_F9'] = combined_df.loc[company_mask, 'pdate'].map(mean_f9_for_month)
    


print("Done")


Done


In [8]:
combined_df.head(24)

Unnamed: 0,Company,predicted_target,pdate,Mean_F9
0,PAKS,-0.198042,1403-01,23763600000000.0
1,PAKS,-0.198042,1403-02,22254800000000.0
2,PAKS,-0.198042,1403-03,19548120000000.0
3,PAKS,-0.198042,1403-04,20189480000000.0
4,PAKS,-0.198042,1403-05,20134540000000.0
5,PAKS,-0.198042,1403-06,21839740000000.0
6,ALMR,-0.452371,1403-01,6425625000000.0
7,ALMR,-0.452371,1403-02,6387143000000.0
8,ALMR,-0.452371,1403-03,5512222000000.0
9,ALMR,-0.452371,1403-04,5717105000000.0


In [7]:
combined_df.tail(24)

Unnamed: 0,Company,predicted_target,pdate,Mean_F9
480,KNOOR,0.224888,1403-01,313171900000000.0
481,KNOOR,0.224888,1403-02,314549700000000.0
482,KNOOR,0.224888,1403-03,298396200000000.0
483,KNOOR,0.224888,1403-04,251801600000000.0
484,KNOOR,0.224888,1403-05,244839000000000.0
485,KNOOR,0.224888,1403-06,234551600000000.0
486,KHZAMYA,0.305726,1403-01,162349800000000.0
487,KHZAMYA,0.305726,1403-02,170933600000000.0
488,KHZAMYA,0.305726,1403-03,152007900000000.0
489,KHZAMYA,0.305726,1403-04,152323500000000.0


In [9]:
# Function to calculate mean percentage change in F6 for a specific year and month
def calculate_mean_percentage_change_for_year_month(company_file, target_year_month):
    # Read the company's CSV file
    company_df = pd.read_csv(company_file)
    
    # Extract year-month from the pdate column
    company_df['year_month'] = company_df['pdate'].str.slice(0, 7)
    
    # Filter the data for the target year and month
    filtered_df = company_df[company_df['year_month'] == target_year_month]
    
    # Convert 'date' to datetime format for sorting
    filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
    
    # Drop rows with invalid 'date' values
    filtered_df = filtered_df.dropna(subset=['date'])
    
    # Convert F6 to numeric (handle invalid values such as '-') for filtered_df
    filtered_df['F6'] = pd.to_numeric(filtered_df['F6'], errors='coerce')
    
    # Drop rows where F6 is NaN
    filtered_df = filtered_df.dropna(subset=['F6'])
    
    if filtered_df.empty:
        return None  # If no valid data is left after filtering

    # Sort by 'date' to get the first and last day of the target year-month
    filtered_df = filtered_df.sort_values(by='date')
    
    # Get F6 values for the first and last day
    first_day_f6 = filtered_df.iloc[0]['F6']
    last_day_f6 = filtered_df.iloc[-1]['F6']
    
    # Ensure that both first_day_f6 and last_day_f6 are valid numbers
    if pd.isna(first_day_f6) or pd.isna(last_day_f6) or first_day_f6 == 0:
        return None
    
    # Calculate the percentage change
    percentage_change = ((last_day_f6 - first_day_f6) / first_day_f6) * 100
    
    return percentage_change

# Define the specific year and months for which we want to calculate changes
target_year = 1403
months = [f"{m:02d}" for m in range(1, 7)]  # From '01' to '12'

# Loop through each company and each month to calculate and assign the mean percentage change in F6
for company in names:
    company_file = f'data/{company}.csv'
    
    for month in months:
        # Create the year-month string
        year_month = f"{target_year}-{month}"
        
        # Calculate the mean percentage change in F6 for the current year-month
        mean_pct_change = calculate_mean_percentage_change_for_year_month(company_file, year_month)

        # Filter the combined DataFrame for the current company and year-month
        company_mask = (combined_df['Company'] == company) & (combined_df['pdate'].str.slice(0, 7) == year_month)

        # Assign the calculated mean percentage change to the corresponding rows in combined_df
        combined_df.loc[company_mask, 'Mean_Pct_Change_F6'] = mean_pct_change

print("Done")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

Done


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['date'] = pd.to_datetime(filtered_df['date'

In [10]:
combined_df.tail(24)

Unnamed: 0,Company,predicted_target,pdate,Mean_F9,Mean_Pct_Change_F6
486,KNOOR,0.573662,1403-01,313171900000000.0,0.758534
487,KNOOR,0.573662,1403-02,314549700000000.0,-3.87985
488,KNOOR,0.573662,1403-03,298396200000000.0,1.041667
489,KNOOR,0.573662,1403-04,251801600000000.0,-18.427835
490,KNOOR,0.573662,1403-05,244839000000000.0,-2.369668
491,KNOOR,0.573662,1403-06,234551600000000.0,-2.750809
492,KHZAMYA,0.422561,1403-01,162349800000000.0,-10.294118
493,KHZAMYA,0.422561,1403-02,170933600000000.0,-3.275356
494,KHZAMYA,0.422561,1403-03,152007900000000.0,-8.206547
495,KHZAMYA,0.422561,1403-04,152323500000000.0,0.347567


In [11]:
combined_df.to_csv("predict_1403_test_final.csv")

In [12]:
def calculate_advanced_portfolio_weights(df, feature_col, exponent=2):
    # Step 1: Find the minimum value in the feature column and shift values if necessary
    min_value = df[feature_col].min()
    shift_constant = abs(min_value) if min_value < 0 else 0
    df['shifted_feature'] = df[feature_col] + shift_constant
    
    # Step 2: Apply exponential weighting
    df['exp_weight'] = np.power(df['shifted_feature'], exponent)
    
    # Step 3: Normalize the weights
    df['weight'] = df['exp_weight'] / df['exp_weight'].sum()
    
    # Drop the intermediate columns used in calculations
    df.drop(columns=['shifted_feature', 'exp_weight'], inplace=True)
    
    # Step 4: Calculate Mean_F9 proportion and create a new column
    df['Mean_F9_Proportion'] = df['Mean_F9'] / df['Mean_F9'].sum()
    
    # Step 5: Create a new column which is Pct_Change_F6 multiplied by the Mean_F9 proportion
    df['Pct_Change_F6_Mean_F9'] = df['Mean_Pct_Change_F6'] * df['Mean_F9_Proportion']
    
    # Step 6: Create a new column which is Pct_Change_F6 multiplied by the weight
    df['Pct_Change_F6_Weight'] = df['Mean_Pct_Change_F6'] * df['weight']
    
    # Step 7: Calculate the sums of the new columns
    sum_pct_change_mean_f9 = df['Pct_Change_F6_Mean_F9'].sum()
    sum_pct_change_weight = df['Pct_Change_F6_Weight'].sum()
    
    return df, sum_pct_change_mean_f9, sum_pct_change_weight

# Define the specific year and months for which we want to calculate changes
target_year = 1403
months = [f"{m:02d}" for m in range(1, 7)]  # From '01' to '12'

# Create a new DataFrame to store cumulative results
cumulative_results = []

# Create a list to store results for each month
monthly_results = []

# Loop through each month within the year
for month in months:
    # Create the year-month string
    year_month = f"{target_year}-{month}"
    
    # Filter the combined_df for the specific year-month
    filtered_df = combined_df[combined_df['pdate'] == year_month]
    
    if filtered_df.empty:
        continue  # Skip if no data for that specific year-month

    # Calculate portfolio weights and perform the additional calculations
    detailed_df, sum_pct_change_mean_f9, sum_pct_change_weight = calculate_advanced_portfolio_weights(filtered_df, 'predicted_target', exponent=3)

     # Add year_month as a new column to the detailed DataFrame
    detailed_df['year_month'] = year_month
    
    # Store the detailed DataFrame for the month
    monthly_results.append(detailed_df)
   
    # Store the cumulative results in a list
    cumulative_results.append({
        'year_month': year_month,
        'sum_pct_change_mean_f9': sum_pct_change_mean_f9,
        'sum_pct_change_weight': sum_pct_change_weight
    })

# Convert the results into a DataFrame for easier plotting
cumulative_df = pd.DataFrame(cumulative_results)

# Display the first few rows of the cumulative DataFrame
print(cumulative_df.head())



  year_month  sum_pct_change_mean_f9  sum_pct_change_weight
0    1403-01               -1.054832              -3.003251
1    1403-02               -8.735924              -7.899230
2    1403-03               -5.333193              -5.870000
3    1403-04                2.634509               3.807363
4    1403-05               -8.833099              -6.733953


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['shifted_feature'] = df[feature_col] + shift_constant
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['exp_weight'] = np.power(df['shifted_feature'], exponent)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['weight'] = df['exp_weight'] / df['exp_weight'].sum()
A value is trying to be set on

In [13]:
# Combine all monthly results into a single DataFrame
all_monthly_data = pd.concat(monthly_results, ignore_index=True)

# Sort the DataFrame by 'pdate' in ascending order, then by 'weight' in descending order
all_monthly_data_sorted = all_monthly_data.sort_values(by=['pdate', 'weight'], ascending=[True, False])

# Print the sorted DataFrame
# Display the first few rows of the combined DataFrame with weights
all_monthly_data_sorted.head()

Unnamed: 0,Company,predicted_target,pdate,Mean_F9,Mean_Pct_Change_F6,weight,Mean_F9_Proportion,Pct_Change_F6_Mean_F9,Pct_Change_F6_Weight,year_month
79,JAMPILEN,1.079504,1403-01,357575000000000.0,2.714984,0.079783,0.011673,0.031691,0.216609,1403-01
44,KKHAK,0.751683,1403-01,54173440000000.0,-9.831985,0.043862,0.001768,-0.017387,-0.431249,1403-01
75,SARBIL,0.669713,1403-01,32523300000000.0,6.728111,0.036994,0.001062,0.007143,0.248899,1403-01
61,SETRAN,0.604147,1403-01,103646900000000.0,8.035714,0.032047,0.003383,0.027188,0.257524,1403-01
53,NOORI,0.591,1403-01,1536812000000000.0,10.209887,0.031112,0.050167,0.512202,0.317649,1403-01


In [14]:
all_monthly_data_sorted.to_csv("portfolio_1403_test_final.csv")

In [15]:
cumulative_df.to_csv("cumulative_df_1403_test_final.csv")