In [2]:
import pandas as pd
import numpy as np
import os


## Synthetic DB creation

## Predictions DB

In [None]:
np.random.seed(42) 

num_rows = 10
num_tasks = 25

# Step 1: Create Id column
id_column = list(range(1, num_rows + 1))

# Step 2: Create columns tasks
t_columns = {}
for i in range(1, num_tasks + 1): 
    t_columns[f'T{i}'] = np.random.randint(0, 2, size=num_rows)

# Step 3: Create class column
class_column = np.random.randint(0, 2, size=num_rows)

# Step 4: Create DataFrame
data = {'Id': id_column}
data.update(t_columns)
data['class'] = class_column

df = pd.DataFrame(data)

# Step 5: Save to CSV
df.to_csv('binary_data.csv', index=False)

# Display the first few rows
df

Unnamed: 0,Id,T1,T2,T3,T4,T5,T6,T7,T8,T9,...,T17,T18,T19,T20,T21,T22,T23,T24,T25,class
0,1,0,0,1,0,0,0,0,1,0,...,0,1,0,0,0,0,0,1,1,1
1,2,1,0,0,0,0,1,0,1,1,...,0,0,0,0,1,0,0,1,0,0
2,3,0,0,1,1,1,0,0,0,0,...,1,0,1,0,0,0,1,1,1,1
3,4,0,0,1,1,1,1,0,1,1,...,1,1,0,0,0,0,1,1,0,0
4,5,0,1,1,1,1,0,0,0,0,...,1,1,0,0,1,0,1,1,0,0
5,6,1,0,1,0,1,1,1,1,0,...,0,1,0,1,0,0,1,1,1,0
6,7,0,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,0,1,0,0
7,8,0,1,1,0,0,0,0,1,0,...,0,0,0,1,1,0,1,1,0,1
8,9,0,1,1,0,1,0,1,0,1,...,0,0,0,0,1,1,0,0,0,1
9,10,1,0,1,0,1,0,1,1,1,...,1,0,1,0,0,0,0,1,0,0


## Confidence degree DB

In [6]:
# Parameters
np.random.seed(42)
num_rows = 10
num_tasks = 25

# Step 1: Create Id column
id_column = list(range(1, num_rows + 1))

# Step 2: Create confidence degree columns for tasks
cd_columns = {}
for i in range(1, num_tasks + 1):
    # Generate first confidence value between 0 and 1
    cd0 = np.random.random(size=num_rows)
    # Calculate second confidence value as 1 - first value
    cd1 = 1 - cd0
    
    # Round to 3 decimal places
    cd0 = np.round(cd0, 3)
    cd1 = np.round(cd1, 3)
    
    # Add both confidence degrees to columns
    cd_columns[f'Cd0_T{i}'] = cd0
    cd_columns[f'Cd1_T{i}'] = cd1

# Step 3: Create class column
class_column = np.random.randint(0, 2, size=num_rows)

# Step 4: Create DataFrame
data = {'Id': id_column}
data.update(cd_columns)
data['class'] = class_column
df = pd.DataFrame(data)

# Step 5: Save to CSV
df.to_csv('confidence_degrees.csv', index=False)

# Display the DataFrame
df

Unnamed: 0,Id,Cd0_T1,Cd1_T1,Cd0_T2,Cd1_T2,Cd0_T3,Cd1_T3,Cd0_T4,Cd1_T4,Cd0_T5,...,Cd1_T21,Cd0_T22,Cd1_T22,Cd0_T23,Cd1_T23,Cd0_T24,Cd1_T24,Cd0_T25,Cd1_T25,class
0,1,0.375,0.625,0.021,0.979,0.612,0.388,0.608,0.392,0.122,...,0.358,0.549,0.451,0.658,0.342,0.795,0.205,0.94,0.06,0
1,2,0.951,0.049,0.97,0.03,0.139,0.861,0.171,0.829,0.495,...,0.916,0.692,0.308,0.568,0.432,0.503,0.497,0.954,0.046,0
2,3,0.732,0.268,0.832,0.168,0.292,0.708,0.065,0.935,0.034,...,0.838,0.652,0.348,0.094,0.906,0.577,0.423,0.915,0.085,0
3,4,0.599,0.401,0.212,0.788,0.366,0.634,0.949,0.051,0.909,...,0.101,0.224,0.776,0.368,0.632,0.493,0.507,0.37,0.63,1
4,5,0.156,0.844,0.182,0.818,0.456,0.544,0.966,0.034,0.259,...,0.394,0.712,0.288,0.265,0.735,0.195,0.805,0.015,0.985,0
5,6,0.156,0.844,0.183,0.817,0.785,0.215,0.808,0.192,0.663,...,0.991,0.237,0.763,0.244,0.756,0.722,0.278,0.928,0.072,0
6,7,0.058,0.942,0.304,0.696,0.2,0.8,0.305,0.695,0.312,...,0.899,0.325,0.675,0.973,0.027,0.281,0.719,0.428,0.572,1
7,8,0.866,0.134,0.525,0.475,0.514,0.486,0.098,0.902,0.52,...,0.336,0.746,0.254,0.393,0.607,0.024,0.976,0.967,0.033,0
8,9,0.601,0.399,0.432,0.568,0.592,0.408,0.684,0.316,0.547,...,0.995,0.65,0.35,0.892,0.108,0.645,0.355,0.964,0.036,1
9,10,0.708,0.292,0.291,0.709,0.046,0.954,0.44,0.56,0.185,...,0.839,0.849,0.151,0.631,0.369,0.177,0.823,0.853,0.147,1


## Feature Vector Tasks csv

In [3]:
def create_feature_vector_csv(num_rows, num_features, file_number):
    # Create Id column
    data = {'Id': list(range(1, num_rows + 1))}
    
    # Create feature columns with random values between 0 and 1
    for i in range(1, num_features + 1):
        data[f'f{i}'] = np.random.random(size=num_rows).round(3)
    
    # Create class column (binary)
    data['class'] = np.random.randint(0, 2, size=num_rows)
    
    # Create DataFrame
    df = pd.DataFrame(data)
    
    # Save to CSV in the specified folder
    filename = f'feature_vector_{file_number}.csv'
    filepath = os.path.join('Feature_vector_file', filename)
    df.to_csv(filepath, index=False)
    
    return filename

# Set parameters
np.random.seed(42)  # For reproducibility
num_files = 5       # Number of CSV files to generate
num_rows = 10      # Number of rows in each CSV
num_features = 10   # Number of features in each CSV

# Create directory if it doesn't exist
if not os.path.exists('Feature_vector_file'):
    os.makedirs('Feature_vector_file')

# Generate multiple CSV files
generated_files = []
for i in range(1, num_files + 1):
    filename = create_feature_vector_csv(num_rows, num_features, i)
    generated_files.append(filename)

# Display summary and first few rows of each file
for file in generated_files:
    df = pd.read_csv(os.path.join('Feature_vector_file', file))
    display(f"\nFirst 3 rows of {file}:")
    display(df.head(3))
    display(f"Shape: {df.shape}")

'\nFirst 3 rows of feature_vector_1.csv:'

Unnamed: 0,Id,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,class
0,1,0.375,0.021,0.612,0.608,0.122,0.97,0.389,0.772,0.863,0.12,0
1,2,0.951,0.97,0.139,0.171,0.495,0.775,0.271,0.199,0.623,0.713,1
2,3,0.732,0.832,0.292,0.065,0.034,0.939,0.829,0.006,0.331,0.761,0


'Shape: (10, 12)'

'\nFirst 3 rows of feature_vector_2.csv:'

Unnamed: 0,Id,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,class
0,1,0.249,0.871,0.427,0.323,0.037,0.242,0.835,0.174,0.66,0.349,0
1,2,0.41,0.804,0.818,0.519,0.61,0.672,0.321,0.691,0.817,0.726,1
2,3,0.756,0.187,0.861,0.703,0.503,0.762,0.187,0.387,0.555,0.897,1


'Shape: (10, 12)'

'\nFirst 3 rows of feature_vector_3.csv:'

Unnamed: 0,Id,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,class
0,1,0.549,0.658,0.795,0.94,0.294,0.615,0.809,0.89,0.031,0.052,1
1,2,0.692,0.568,0.503,0.954,0.385,0.99,0.81,0.338,0.037,0.531,1
2,3,0.652,0.094,0.577,0.915,0.851,0.14,0.867,0.376,0.823,0.541,0


'Shape: (10, 12)'

'\nFirst 3 rows of feature_vector_4.csv:'

Unnamed: 0,Id,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,class
0,1,0.696,0.738,0.041,0.616,0.642,0.386,0.319,0.803,0.931,0.896,0
1,2,0.409,0.554,0.855,0.635,0.027,0.961,0.845,0.282,0.858,0.389,0
2,3,0.173,0.612,0.704,0.045,0.586,0.905,0.023,0.177,0.429,0.011,0


'Shape: (10, 12)'

'\nFirst 3 rows of feature_vector_5.csv:'

Unnamed: 0,Id,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,class
0,1,0.792,0.143,0.085,0.777,0.118,0.012,0.629,0.456,0.698,0.613,0
1,2,0.79,0.762,0.987,0.558,0.649,0.97,0.696,0.62,0.536,0.418,1
2,3,0.091,0.618,0.374,0.424,0.746,0.043,0.455,0.277,0.31,0.933,0


'Shape: (10, 12)'