# Create Data

In [1]:
import requests
import pandas as pd
import numpy as np
import random
from faker import Faker
import holidays
from datetime import datetime

# Set location to Germany
faker = Faker('de_DE')
german_holidays = holidays.Germany()

# OpenWeatherMap API
API_KEY = 'ffb423866f384a837117cf910ec31607'
BASE_URL = 'http://api.openweathermap.org/data/2.5/weather'

#Fetch the current temperature for a given city from OpenWeatherMap.
def get_weather(city): 
    url = f"{BASE_URL}?q={city},DE&appid={API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # Convert Kelvin to Celsius
        temperature = data['main']['temp'] - 273.15
        return round(temperature, 2)
    else:
        return 0

# Generate a list of cities 
def get_cities(number):
    cities_set = set()
    while len(cities_set) < number:
        cities_set.add(faker.city())
    return list(cities_set)

# Generate dates
def get_dates(number_dates, start_date):
    start_date = pd.to_datetime(start_date)
    return [start_date + pd.Timedelta(days=x) for x in range(number_dates)]

# Generate a DataFrame with random data
def generate_data(number_cities, number_dates, start_date):
    cities = get_cities(number_cities)
    dates = pd.to_datetime(get_dates(number_dates, start_date) )
    
    
    # Generating the DataFrame
    df = pd.DataFrame({
        'day': dates.day,
        'month': dates.month,
        'year': dates.year,
        'is_holiday': [date in german_holidays for date in dates],
        'location': [random.choice(cities) for _ in dates],
    })
    
    # Assigning temperatures to each row in the DataFrame
    df['temperature'] = df['location'].apply(get_weather)

    return df

def generate_product_demands(data):
    df = data.copy()
    
    # Assuming different random base demands for each product
    base_demands = [random.randint(50, 100) for _ in range(6)]
    
    # Optimal temperature and sensitivity
    optimal_temp = 25
    temp_sensitivity = 1.5
    
    # Holiday demand increase percentage
    holiday_multiplier = 1.2
    
    # Assign random location multipliers for all distinct locations
    unique_locations = df['location'].unique()
    location_multipliers = {loc: random.uniform(0.8, 1.2) for loc in unique_locations}
    
    # Initialize demand columns for each product
    for i in range(6):
        # Calculate base demand adjusted for holiday
        df[f'product_{i+1}_demand'] = df['is_holiday'].apply(lambda x: base_demands[i] * holiday_multiplier if x else base_demands[i])
        
        # Adjust for temperature
        df[f'product_{i+1}_demand'] += (np.abs(df['temperature'] - optimal_temp) * -temp_sensitivity)
        
        # Adjust for location multiplier
        df[f'product_{i+1}_demand'] *= df['location'].apply(lambda loc: location_multipliers[loc])

        # Round to nearest integer
        df[f'product_{i+1}_demand'] = df[f'product_{i+1}_demand'].apply(round)
    
    return df[[ 'product_1_demand', 'product_2_demand', 'product_3_demand', 'product_4_demand', 'product_5_demand', 'product_6_demand']]




In [2]:
# Generate data
feature_data = generate_data(10, 1000, '2020-01-01')
target_data = generate_product_demands(feature_data)

df = pd.concat([feature_data, target_data], axis=1)



print(df.head(10))

   day  month  year  is_holiday                  location  temperature  \
0    1      1  2020        True                 Spremberg         3.80   
1    2      1  2020       False            Recklinghausen         7.36   
2    3      1  2020       False            Recklinghausen         7.36   
3    4      1  2020       False                 Spremberg         3.80   
4    5      1  2020       False                Schwandorf         5.15   
5    6      1  2020       False                Schwandorf         5.15   
6    7      1  2020       False  Rothenburg ob der Tauber         4.79   
7    8      1  2020       False                   Wolfach         6.59   
8    9      1  2020       False                     Wesel         7.84   
9   10      1  2020       False                 Spremberg         3.80   

   product_1_demand  product_2_demand  product_3_demand  product_4_demand  \
0                54                38                53                35   
1                67            

In [5]:
df.to_csv('Data_test_Multi_Raw/data.csv', index=False)

# Visualize Data

In [4]:
df.to_csv('Data_Test_v2/data.csv, index=False)


SyntaxError: unterminated string literal (detected at line 1) (2822351141.py, line 1)