# Data Generator
In lack of data sources like Bloomberg Terminals or the like, I have build this code for generating stock price data. This allows me to access timple test data with fictive features such as rallies, crashes, recoveries, stock splits, dividends, real variations like abnormalities, deviations in Open, High, Low, and Close.

In [17]:
# Import required libraries
import pandas as pd
import numpy as np
import os

# Define the directory and ensure it exists
directory = "Market_Data"
if not os.path.exists(directory):
    os.makedirs(directory)

# Upwards Trend

In [8]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Create an upward trend
upward_prices = np.linspace(50, 500, num_days) + np.random.normal(0, 3, num_days)  # Smoother upward trend

# Generate realistic stock data for the upward trend
data_upward_trend = {
    "Date": date_range,
    "Open": upward_prices * np.random.uniform(0.98, 1.02, num_days),
    "High": upward_prices * np.random.uniform(1.00, 1.03, num_days),
    "Low": upward_prices * np.random.uniform(0.95, 0.99, num_days),
    "Close": upward_prices * np.random.uniform(0.97, 1.01, num_days),
    "Adj Close": upward_prices * np.random.uniform(0.97, 1.01, num_days),
    "Volume": np.random.randint(500_000, 5_000_000, num_days),
}

# Create DataFrame
stock_data_upward = pd.DataFrame(data_upward_trend)
stock_data_upward = stock_data_upward.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_upward = os.path.join(directory, "fictive_stock_data_upward_trend.csv")
stock_data_upward.to_csv(file_path_upward, index=False)

print(f"Dataset generated and saved to {file_path_upward}")

Dataset generated and saved to Market_Data/fictive_stock_data_upward_trend.csv


# Upwards Trend + Crash & Recovery

In [10]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Create a base upward trend
prices = np.linspace(50, 500, num_days)

# Simulate a big crash in 2022
crash_start = pd.Timestamp("2022-01-01")
crash_end = pd.Timestamp("2022-12-31")
crash_mask = (date_range >= crash_start) & (date_range <= crash_end)
prices[crash_mask] = prices[crash_mask] * np.linspace(1, 0.5, crash_mask.sum())  # Drop to 50% of the value

# Recovery after the crash
recovery_start = pd.Timestamp("2023-01-01")
recovery_mask = (date_range > crash_end) & (date_range >= recovery_start)
prices[recovery_mask] = np.linspace(prices[crash_mask][-1], 500, recovery_mask.sum())

# Add randomness to the prices
prices += np.random.normal(0, 5, num_days)

# Generate realistic stock data for the trend with a crash and recovery
data_crash_recovery = {
    "Date": date_range,
    "Open": prices * np.random.uniform(0.98, 1.02, num_days),
    "High": prices * np.random.uniform(1.00, 1.03, num_days),
    "Low": prices * np.random.uniform(0.95, 0.99, num_days),
    "Close": prices * np.random.uniform(0.97, 1.01, num_days),
    "Adj Close": prices * np.random.uniform(0.97, 1.01, num_days),
    "Volume": np.random.randint(500_000, 5_000_000, num_days),
}

# Create DataFrame
stock_data_crash_recovery = pd.DataFrame(data_crash_recovery)
stock_data_crash_recovery = stock_data_crash_recovery.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_crash_recovery = os.path.join(directory, "fictive_stock_data_crash_recovery.csv")
stock_data_crash_recovery.to_csv(file_path_crash_recovery, index=False)

print(f"Dataset generated and saved to {file_path_crash_recovery}")

Dataset generated and saved to Market_Data/fictive_stock_data_crash_recovery.csv


# Upwards Trend + Stock Split

In [19]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Create a steady upward trend
prices = np.linspace(50, 500, num_days) + np.random.normal(0, 3, num_days)  # Add some noise

# Introduce a stock split
split_date = np.random.choice(date_range[365:])  # Random split date, at least after one year
split_index = np.where(date_range == split_date)[0][0]

# Apply the split: halve the price and double the volume after the split date
split_factor = 0.5
prices[split_index:] = prices[split_index:] * split_factor

# Generate realistic trading volumes
volumes = np.random.randint(500_000, 5_000_000, num_days)
volumes[split_index:] = volumes[split_index:] * (1 / split_factor)  # Double volume after the split

# Generate realistic stock data for the upward trend with a stock split
data_with_split = {
    "Date": date_range,
    "Open": prices * np.random.uniform(0.98, 1.02, num_days),
    "High": prices * np.random.uniform(1.00, 1.03, num_days),
    "Low": prices * np.random.uniform(0.95, 0.99, num_days),
    "Close": prices * np.random.uniform(0.97, 1.01, num_days),
    "Adj Close": prices * np.random.uniform(0.97, 1.01, num_days),
    "Volume": volumes,
    "Split Factor": [1 if i < split_index else split_factor for i in range(num_days)]
}

# Create DataFrame
stock_data_with_split = pd.DataFrame(data_with_split)
stock_data_with_split = stock_data_with_split.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_with_split = os.path.join(directory, "fictive_stock_data_with_split.csv")
stock_data_with_split.to_csv(file_path_with_split, index=False)

print(f"Dataset generated and saved to {file_path_with_split}")

Dataset generated and saved to Market_Data/fictive_stock_data_with_split.csv


# Random Fluctuations

In [24]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Generate random fluctuations for prices
prices = [100]  # Start price at 100
for _ in range(1, num_days):
    change = np.random.uniform(-2, 2)  # Daily price change between -2 and 2
    prices.append(prices[-1] + change)

# Ensure prices stay positive
prices = np.maximum(prices, 1)  # Avoid negative or zero prices

# Generate realistic trading volumes
volumes = np.random.randint(500_000, 5_000_000, num_days)

# Generate realistic stock data for random fluctuations
data_random_fluctuations = {
    "Date": date_range,
    "Open": np.array(prices) * np.random.uniform(0.98, 1.02, num_days),
    "High": np.array(prices) * np.random.uniform(1.00, 1.03, num_days),
    "Low": np.array(prices) * np.random.uniform(0.95, 0.99, num_days),
    "Close": np.array(prices) * np.random.uniform(0.97, 1.01, num_days),
    "Adj Close": np.array(prices) * np.random.uniform(0.97, 1.01, num_days),
    "Volume": volumes
}

# Create DataFrame
stock_data_random = pd.DataFrame(data_random_fluctuations)
stock_data_random = stock_data_random.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_random = os.path.join(directory, "fictive_stock_data_random_fluctuations.csv")
stock_data_random.to_csv(file_path_random, index=False)

print(f"Dataset generated and saved to {file_path_random}")

Dataset generated and saved to Market_Data/fictive_stock_data_random_fluctuations.csv


# Data for Multiple Stock Prices

In [26]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Define the companies
companies = list(range(1, 102))  # Generate numbers 1 to 101 as company identifiers

# Function to generate unique price movements for each company
def generate_price_movement(trend_type):
    prices = []
    if trend_type == "upward":
        prices = np.linspace(50, 500, num_days) + np.random.normal(0, 5, num_days)
    elif trend_type == "downward":
        prices = np.linspace(500, 50, num_days) + np.random.normal(0, 5, num_days)
    elif trend_type == "random":
        prices = [100]
        for _ in range(1, num_days):
            prices.append(prices[-1] + np.random.uniform(-2, 2))
        prices = np.maximum(prices, 1)  # Ensure no negative prices
    elif trend_type == "crash":
        prices = np.linspace(100, 500, num_days)
        crash_start = pd.Timestamp("2022-01-01")
        crash_end = pd.Timestamp("2022-12-31")
        crash_mask = (date_range >= crash_start) & (date_range <= crash_end)
        prices[crash_mask] = prices[crash_mask] * np.linspace(1, 0.3, crash_mask.sum())
        prices = prices + np.random.normal(0, 5, num_days)
    return np.maximum(prices, 1)  # Ensure no negative prices

# Generate data for all companies
data = []
for company in companies:
    trend_type = np.random.choice(["upward", "downward", "random", "crash"])  # Randomly select a trend
    prices = generate_price_movement(trend_type)
    volumes = np.random.randint(500_000, 5_000_000, num_days)
    
    for i, date in enumerate(date_range):
        data.append({
            "Company": company,
            "Date": date,
            "Open": prices[i] * np.random.uniform(0.98, 1.02),
            "High": prices[i] * np.random.uniform(1.00, 1.03),
            "Low": prices[i] * np.random.uniform(0.95, 0.99),
            "Close": prices[i] * np.random.uniform(0.97, 1.01),
            "Adj Close": prices[i] * np.random.uniform(0.97, 1.01),
            "Volume": volumes[i]
        })

# Create DataFrame
stock_data_multiple = pd.DataFrame(data)
stock_data_multiple = stock_data_multiple.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_multiple = os.path.join(directory, "fictive_stock_data_101_companies.csv")
stock_data_multiple.to_csv(file_path_multiple, index=False)

print(f"Dataset generated and saved to {file_path_multiple}")

Dataset generated and saved to Market_Data/fictive_stock_data_101_companies.csv


# Data for Multiple Stock Prices with Simulated Crash and Correlation Environment

In [29]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Define the companies
companies = list(range(1, 102))  # Generate numbers 1 to 101 as company identifiers

# Randomly select a crash date
crash_start = pd.Timestamp("2022-01-01")
crash_end = pd.Timestamp("2022-06-30")
crash_date = np.random.choice(pd.date_range(crash_start, crash_end, freq="B"))

# Function to generate price movements with optional correlation
def generate_price_movement(base_prices=None, correlation=None):
    if correlation == "positive":
        # Positively correlated: Follow base prices with slight variations
        prices = base_prices * np.random.uniform(0.95, 1.05, len(base_prices))
    elif correlation == "negative":
        # Negatively correlated: Move inversely to base prices
        prices = base_prices.max() + base_prices.min() - base_prices * np.random.uniform(0.95, 1.05, len(base_prices))
    else:
        # Independent behavior
        prices = [100]
        for _ in range(1, num_days):
            prices.append(prices[-1] + np.random.uniform(-2, 2))
        prices = np.maximum(prices, 1)  # Ensure no negative prices
    return np.maximum(prices, 1)  # Ensure no negative prices

# Generate base prices for the market with a crash
base_prices = np.linspace(500, 50, num_days)  # General downward trend
crash_index = np.where(date_range == crash_date)[0][0]
base_prices[crash_index:] *= 0.3  # Apply crash after the crash date
base_prices += np.random.normal(0, 5, num_days)  # Add noise

# Generate data for all companies
data = []
for company in companies:
    if company % 3 == 0:
        # Positively correlated
        prices = generate_price_movement(base_prices, correlation="positive")
    elif company % 3 == 1:
        # Negatively correlated
        prices = generate_price_movement(base_prices, correlation="negative")
    else:
        # Independent behavior
        prices = generate_price_movement()
    
    volumes = np.random.randint(500_000, 5_000_000, num_days)
    
    for i, date in enumerate(date_range):
        data.append({
            "Company": company,
            "Date": date,
            "Open": prices[i] * np.random.uniform(0.98, 1.02),
            "High": prices[i] * np.random.uniform(1.00, 1.03),
            "Low": prices[i] * np.random.uniform(0.95, 0.99),
            "Close": prices[i] * np.random.uniform(0.97, 1.01),
            "Adj Close": prices[i] * np.random.uniform(0.97, 1.01),
            "Volume": volumes[i]
        })

# Create DataFrame
stock_data_correlated = pd.DataFrame(data)
stock_data_correlated = stock_data_correlated.round({
    "Open": 2, "High": 2, "Low": 2, "Close": 2, "Adj Close": 2
})  # Round prices

# Save to CSV inside the Market_Data directory
file_path_correlated = os.path.join(directory, "fictive_stock_data_101_companies_correlated.csv")
stock_data_correlated.to_csv(file_path_correlated, index=False)

print(f"Dataset generated and saved to {file_path_correlated}")

Dataset generated and saved to Market_Data/fictive_stock_data_101_companies_correlated.csv


# Data for Multiple Stock Prices with Simulated Crash and Correlation Environment
Companies represented on Horisontal Axis

In [37]:
# Define date range
date_range = pd.date_range(start="2020-01-01", end="2024-11-20", freq='B')  # Business days only
num_days = len(date_range)

# Define the companies
companies = list(range(1, 102))  # Generate numbers 1 to 101 as company identifiers

# Randomly select a crash date
crash_start = pd.Timestamp("2022-01-01")
crash_end = pd.Timestamp("2022-06-30")
crash_date = np.random.choice(pd.date_range(crash_start, crash_end, freq="B"))

# Function to generate price movements with optional correlation
def generate_price_movement(base_prices=None, correlation=None):
    if correlation == "positive":
        # Positively correlated: Follow base prices with slight variations
        prices = base_prices * np.random.uniform(0.95, 1.05, len(base_prices))
    elif correlation == "negative":
        # Negatively correlated: Move inversely to base prices
        prices = base_prices.max() + base_prices.min() - base_prices * np.random.uniform(0.95, 1.05, len(base_prices))
    else:
        # Independent behavior
        prices = [100]
        for _ in range(1, num_days):
            prices.append(prices[-1] + np.random.uniform(-2, 2))
        prices = np.maximum(prices, 1)  # Ensure no negative prices
    return np.maximum(prices, 1)  # Ensure no negative prices

# Generate base prices for the market with a crash
base_prices = np.linspace(500, 50, num_days)  # General downward trend
crash_index = np.where(date_range == crash_date)[0][0]
base_prices[crash_index:] *= 0.3  # Apply crash after the crash date
base_prices += np.random.normal(0, 5, num_days)  # Add noise

# Generate data for all companies
company_data = {}
for company in companies:
    if company % 3 == 0:
        # Positively correlated
        prices = generate_price_movement(base_prices, correlation="positive")
    elif company % 3 == 1:
        # Negatively correlated
        prices = generate_price_movement(base_prices, correlation="negative")
    else:
        # Independent behavior
        prices = generate_price_movement()
    company_data[company] = prices

# Create DataFrame with dates as rows and companies as columns
stock_data_transposed = pd.DataFrame(company_data, index=date_range)
stock_data_transposed.index.name = "Date"

# Save to CSV inside the Market_Data directory
file_path_transposed = os.path.join(directory, "fictive_stock_data_101_correlated_sorted.csv")
stock_data_transposed.to_csv(file_path_transposed)

print(f"Dataset generated and saved to {file_path_transposed}")

Dataset generated and saved to Market_Data/fictive_stock_data_101_correlated_sorted.csv
