In [95]:
import pandas as pd

# Read the file
csv_file = 'output.csv'
df = pd.read_csv(csv_file)

In [96]:
from datetime import datetime, timedelta

# Ensure column names are stripped of whitespace
df.columns = df.columns.str.strip()

# Check the column names after stripping
print(df.columns.tolist())

# Function to calculate the start week of the year
def start_week_of_year(date):
    year_start = datetime(date.year, 1, 1)
    week_start = year_start - timedelta(days=year_start.weekday())
    delta = date - week_start
    return (delta.days // 7) + 1

# Function to calculate duration in weeks, counting partial weeks as full weeks
def duration_in_weeks(start_date, end_date):
    delta = end_date - start_date
    return (delta.days // 7) + (1 if delta.days % 7 else 0)

# Convert 'Start Date' and 'End Date' to datetime objects
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['End Date'] = pd.to_datetime(df['End Date'])

# Calculate the starting week and duration
df['Starting Week'] = df['Start Date'].apply(start_week_of_year)
df['Duration (weeks)'] = df.apply(lambda row: duration_in_weeks(row['Start Date'], row['End Date']), axis=1)

# Extract the first letter from the 'ATC Code' column
df['ATC Code'] = df['ATC Code'].str[0]

# Save the modified DataFrame to a new CSV file
output_file = 'output_modified.csv'
df.to_csv(output_file, index=False)

#print("New columns added and file saved as 'output_modified.csv'")


['Title', 'Start Date', 'End Date', 'ATC Code']


In [97]:
new_csv_file = 'output_modified.csv'
df_new = pd.read_csv(new_csv_file)

#print(df_new.head(5))
#print(df_new.columns.tolist())

In [98]:
# Convert the 'Start Date' column to datetime
df_new['Start Date'] = pd.to_datetime(df_new['Start Date'])

# Filter rows where 'Start Date' falls within the year 2024
df_filtered = df_new[df_new['Start Date'].dt.year == 2024]

# Display the first 5 rows of the filtered DataFrame to verify
print(df_filtered.head())

                                 Title Start Date    End Date ATC Code  \
0  Estalis 50 / 250 mikrog / 24 tuntia 2024-04-19  2024-07-26        G   
1                  Scalibor vet 0.76 g 2024-04-18  2024-06-15        Q   
2                       Elidel 10 mg/g 2024-03-15  2024-06-07        D   
3                       Elidel 10 mg/g 2024-02-05  2024-05-03        D   
4           Risperidon Ratiopharm 4 mg 2024-04-19  2024-05-02        N   

   Starting Week  Duration (weeks)  
0             16                14  
1             16                 9  
2             11                12  
3              6                13  
4             16                 2  


In [99]:
csv_file = 'data_viikko_hva_2024.csv'
df = pd.read_csv(csv_file, delimiter=";")

# Drop rows with NaN in 'ATC_KOODI'
df.dropna(subset=['ATC_KOODI'], inplace=True)

#print(df.head(5))

In [74]:
# Select the specified columns
df_modified = df[['VUOSI', 'VIIKKO', 'VAR_N_OSTOT', 'ATC_KOODI']].copy()

# Add a new column 'SHORTAGE' with default values of zeros
df_modified['SHORTAGE'] = 0

# Save the modified DataFrame to a new CSV file
output_file = 'data_viikko_hva_2024_modified.csv'
df_modified.to_csv(output_file, index=False, sep=';')


In [87]:
# Load the data from both CSV files
df_viikko = pd.read_csv('data_viikko_hva_2024_modified.csv', delimiter=';')
df_output = pd.read_csv('output_modified.csv')

# Define a function to check if the weeks overlap
def overlap(week, start_week, duration):
    return (week >= start_week) and (week < start_week + duration)

# Go through each row in df_viikko to check for overlaps
for index, row in df_viikko.iterrows():
    week = row['VIIKKO']
    first_letter = row['ATC_KOODI'][0] if pd.notna(row['ATC_KOODI']) else ''

    # Count how many codes in df_output have the same first letter and an overlapping week range
    overlap_count = df_output.apply(lambda x: x['ATC Code'].startswith(first_letter) and 
                                    overlap(week, x['Starting Week'], x['Duration (weeks)']), axis=1).sum()

    # Update the 'SHORTAGE' column with the overlap count
    df_viikko.at[index, 'SHORTAGE'] = overlap_count

# Save the updated DataFrame
df_viikko.to_csv('data_viikko_hva_2024_modified_with_shortages.csv', index=False, sep=';')


In [100]:
csv_file = 'data_viikko_hva_2024_modified_with_shortages.csv'
df = pd.read_csv(csv_file, delimiter=";")

#print(df.head(20))

In [101]:
#df["SHORTAGE"].max()