In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Beamforming data

In [2]:
import numpy as np
import pandas as pd

def generate_beamforming_dataset(num_samples, num_beams=8, history_length=5):
    """
    Generates a synthetic dataset for beamforming optimization.

    Args:
        num_samples (int): The number of data samples to generate.
        num_beams (int): The total number of available beam directions.
        history_length (int): The length of user location and beamforming history.

    Returns:
        pd.DataFrame: A DataFrame containing the generated dataset.
    """

    data = {
        'sample_id': np.arange(num_samples)
    }

    # 1. User Location History (2D coordinates)
    # Each sample has a history_length of (x, y) coordinates
    for i in range(history_length):
        data[f'user_loc_hist_x_{i}'] = np.random.uniform(-100, 100, num_samples)
        data[f'user_loc_hist_y_{i}'] = np.random.uniform(-100, 100, num_samples)

    # 2. Channel State Information (CSI)
    # Simplified: Vector of channel gains/strengths towards each beam direction
    for i in range(num_beams):
        data[f'csi_beam_{i}'] = np.random.uniform(0.1, 10.0, num_samples)

    # 3. Beamforming History
    # A sequence of previously chosen beam indices (0 to num_beams-1)
    for i in range(history_length):
        data[f'beam_hist_{i}'] = np.random.randint(0, num_beams, num_samples)

    # 4. SINR (Signal-to-Interference-plus-Noise Ratio)
    data['sinr'] = np.random.uniform(5, 30, num_samples) # dB values

    # 5. Optimal Beam Index (Target Variable)
    # This is where we define a *synthetic* rule for the optimal beam.
    # In a real scenario, this would be determined by complex optimization.
    # Here, let's make it somewhat correlated with CSI and location.
    optimal_beam_indices = []
    for i in range(num_samples):
        # A simple rule: pick the beam with the highest CSI gain,
        # potentially biased by current "location" (last history point)
        csi_gains = [data[f'csi_beam_{j}'][i] for j in range(num_beams)]
        
        # Add a small, random bias to simulate environmental variations
        # Or a more complex rule could incorporate current location
        
        # For this example, let's just pick the max CSI as the "optimal"
        optimal_beam = np.argmax(csi_gains)
        
        # Let's add a slight "location" influence:
        # If user is mostly in positive X quadrant, prefer a higher index beam
        # This is a very simplistic and arbitrary rule for demonstration.
        last_x = data[f'user_loc_hist_x_{history_length-1}'][i]
        if last_x > 50 and optimal_beam < num_beams - 1:
             optimal_beam = (optimal_beam + np.random.randint(0, 2)) % num_beams # Shift slightly
        
        optimal_beam_indices.append(optimal_beam)

    data['optimal_beam_index'] = optimal_beam_indices

    df = pd.DataFrame(data)
    return df

# Generate a dataset with 10000 samples, 8 beams, and 5 steps in history
num_samples = 10000
num_beams = 8
history_length = 5
dataset = generate_beamforming_dataset(num_samples, num_beams, history_length)

print(f"Generated dataset with {len(dataset)} samples and {num_beams} beams.")
print(dataset.head())
print("\nDataset Info:")
dataset.info()
print("\nDistribution of Optimal Beam Index:")
print(dataset['optimal_beam_index'].value_counts(normalize=True))


dataset.to_csv('beamforming_dataset.csv', index=False)

Generated dataset with 10000 samples and 8 beams.
   sample_id  user_loc_hist_x_0  user_loc_hist_y_0  user_loc_hist_x_1  \
0          0         -32.719914         -24.576819          -0.138887   
1          1         -66.490653          12.217557         -37.051430   
2          2         -64.469566         -15.048229          98.226190   
3          3         -22.062350          11.358396          51.541246   
4          4         -51.302555         -41.380892          59.254136   

   user_loc_hist_y_1  user_loc_hist_x_2  user_loc_hist_y_2  user_loc_hist_x_3  \
0          40.864998         -79.422624          26.884619          41.201659   
1           7.877753          -1.649637          58.587783          99.952816   
2          99.962525         -28.028418          68.916598         -26.898801   
3          21.169529         -92.317232          12.060755          84.083556   
4          13.179588         -36.567350          43.488133          92.987435   

   user_loc_hist_y_3  us