In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, SGDRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
# 1. Load the data from 'out.csv'
try:
    df = pd.read_csv('out.csv')
    print("Data loaded successfully from out.csv")

    # 2. Check for "Unnamed" columns and drop them
    unnamed_cols = [col for col in df.columns if col.startswith("Unnamed")]
    if unnamed_cols:
        df.drop(columns=unnamed_cols, inplace=True)
        print(f"Dropped 'Unnamed' columns: {unnamed_cols}")
    else:
        print("No 'Unnamed' columns found.")

except FileNotFoundError:
    print("Error: out.csv not found. Please make sure the file is in the correct directory.")
    exit()

Data loaded successfully from out.csv
Dropped 'Unnamed' columns: ['Unnamed: 0']


In [3]:
# Display the first few rows and info to understand the data
print("\nFirst few rows of the DataFrame:")
print(df.head())
print("\nInformation about the DataFrame:")
print(df.info())


First few rows of the DataFrame:
   cam_id                                cam_pos_3d  cam_2d_direct  \
0  Camera  [5.399700561023001, -23.596222132577378]     321.855741   
1  Camera  [5.399700561023001, -23.596222132577378]     321.855741   
2  Camera  [5.399700561023001, -23.596222132577378]     321.855741   
3  Camera  [5.399700561023001, -23.596222132577378]     321.855741   
4  Camera  [5.399700561023001, -23.596222132577378]     321.855741   

                                       cam_3d_direct  cam_frameWidth  \
0  [77.27004596536257, 37.453704140273956, 7.8223...            1920   
1  [77.27004596536257, 37.453704140273956, 7.8223...            1920   
2  [77.27004596536257, 37.453704140273956, 7.8223...            1920   
3  [77.27004596536257, 37.453704140273956, 7.8223...            1920   
4  [77.27004596536257, 37.453704140273956, 7.8223...            1920   

   cam_frameHeight  obj_id   2d_visible                              3d_loc  
0             1080      38  [1914,

In [4]:
# 2. Function to safely parse string lists into numpy arrays
def parse_list_string(list_str):
    try:
        return np.fromstring(list_str.strip('[]'), sep=',')
    except:
        return np.nan  # Handle potential parsing errors

In [5]:
# 3. Apply the parsing function to the list-like columns
cols_to_parse = ['cam_pos_3d', 'cam_3d_direct' ,'3d_loc','2d_visible']
for col in cols_to_parse:
    if col in df.columns:
        df[col] = df[col].apply(parse_list_string)
    else:
        print(f"Warning: Column '{col}' not found in the CSV.")

In [6]:
# 4. Expand the parsed columns into individual features
def expand_coordinate_columns(df, col_name, num_dims, new_col_prefix):
    if col_name in df.columns and df[col_name].iloc[0] is not np.nan:
        try:
            expanded_data = pd.DataFrame(df[col_name].tolist(), index=df.index)
            new_cols = [f'{new_col_prefix}_{i+1}' for i in range(num_dims)]
            if expanded_data.shape[1] == num_dims:
                df[new_cols] = expanded_data
                df.drop(columns=[col_name], inplace=True)
                print(f"Column '{col_name}' expanded into: {new_cols}")
            else:
                print(f"Warning: Column '{col_name}' does not consistently have {num_dims} dimensions.")
        except Exception as e:
            print(f"Error expanding column '{col_name}': {e}")
    elif col_name not in df.columns:
        print(f"Warning: Column '{col_name}' not found for expansion.")
    else:
        print(f"Warning: Column '{col_name}' contains NaN values and cannot be reliably expanded.")

expand_coordinate_columns(df, 'cam_pos_3d', 2, 'cam_pos')
expand_coordinate_columns(df, 'cam_3d_direct', 3, 'cam_3d_direct')
expand_coordinate_columns(df, '2d_visible', 2, '2d_visible')
expand_coordinate_columns(df, '3d_loc', 3, '3d_loc')

# Display the DataFrame after expansion
print("\nDataFrame after expanding coordinate columns:")
print(df.head())

Column 'cam_pos_3d' expanded into: ['cam_pos_1', 'cam_pos_2']
Column 'cam_3d_direct' expanded into: ['cam_3d_direct_1', 'cam_3d_direct_2', 'cam_3d_direct_3']
Column '2d_visible' expanded into: ['2d_visible_1', '2d_visible_2']
Column '3d_loc' expanded into: ['3d_loc_1', '3d_loc_2', '3d_loc_3']

DataFrame after expanding coordinate columns:
   cam_id  cam_2d_direct  cam_frameWidth  cam_frameHeight  obj_id  cam_pos_1  \
0  Camera     321.855741            1920             1080      38   5.399701   
1  Camera     321.855741            1920             1080       2   5.399701   
2  Camera     321.855741            1920             1080       0   5.399701   
3  Camera     321.855741            1920             1080      41   5.399701   
4  Camera     321.855741            1920             1080      52   5.399701   

   cam_pos_2  cam_3d_direct_1  cam_3d_direct_2  cam_3d_direct_3  2d_visible_1  \
0 -23.596222        77.270046        37.453704         7.822322        1914.0   
1 -23.596222    

In [7]:
print("Rows before drop: ", len(df))
df.drop_duplicates(inplace=True)
print("Rows after drop: ", len(df))

Rows before drop:  6965450
Rows after drop:  6584833


In [8]:
# 5. Define features (X) and target (y)
if all(col in df.columns for col in ['3d_loc_1', '3d_loc_2', '3d_loc_3']):
    X = df.drop(columns=[col for col in df.columns if col.startswith('3d_loc')])
    y = df[['3d_loc_1', '3d_loc_2', '3d_loc_3']]
else:
    print("Error: Target columns ('3d_loc_1', '3d_loc_2', '3d_loc_3') not found. Please check the data.")
    exit()

In [9]:
# 6. Identify categorical features to drop
categorical_features_to_drop = ['cam_id', 'obj_id', 'cam_frameWidth', 'cam_frameHeight']

In [10]:
# 7. Drop the specified categorical features from the feature set (X)
X_dropped = X.drop(columns=categorical_features_to_drop, errors='ignore')
print(f"\nFeatures (X) after dropping categorical columns '{categorical_features_to_drop}':")
print(X_dropped.head())


Features (X) after dropping categorical columns '['cam_id', 'obj_id', 'cam_frameWidth', 'cam_frameHeight']':
   cam_2d_direct  cam_pos_1  cam_pos_2  cam_3d_direct_1  cam_3d_direct_2  \
0     321.855741   5.399701 -23.596222        77.270046        37.453704   
1     321.855741   5.399701 -23.596222        77.270046        37.453704   
2     321.855741   5.399701 -23.596222        77.270046        37.453704   
3     321.855741   5.399701 -23.596222        77.270046        37.453704   
4     321.855741   5.399701 -23.596222        77.270046        37.453704   

   cam_3d_direct_3  2d_visible_1  2d_visible_2  
0         7.822322        1914.0         487.0  
1         7.822322          60.0         523.0  
2         7.822322         750.0         574.0  
3         7.822322         751.0         447.0  
4         7.822322         290.0         432.0  


In [11]:
print("Target:\n", y)

Target:
           3d_loc_1   3d_loc_2  3d_loc_3
0         2.589343  10.800000  1.075566
1       -23.616542 -12.032302  0.314792
2       -10.861312  -7.774997  0.314778
3       -23.511425   4.535074  0.133567
4       -25.224881  -6.948549  1.413329
...            ...        ...       ...
6965445  -5.528662  25.647364  0.944578
6965446 -12.147794  19.956714  0.954796
6965447   1.552796   8.275697  0.970113
6965448  -7.900477  20.148492  0.807954
6965449   1.499146  20.221157  0.971087

[6584833 rows x 3 columns]


In [12]:
# 8. Identify numerical features (now all remaining columns in X_dropped)
numerical_features = X_dropped.columns.tolist()

# 9. Create preprocessing pipeline for numerical features only
numerical_transformer = StandardScaler()

In [13]:
# 10. Create a ColumnTransformer that only applies the numerical transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features)],
    remainder='passthrough'  # Keep other columns (if any) untouched
)

In [14]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor

# 9. Define the machine learning model
#model = LinearRegression()
model = LinearRegression()

# 9. Define the multi-target regression model
base_regressor = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model = MultiOutputRegressor(base_regressor)

# 10. Create a pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', model)])

# 11. Split the data into training and a temporary set (80% train, 20% temp)
X_train, X_temp, y_train, y_temp = train_test_split(X_dropped, y, test_size=0.2, random_state=42)

# 12. Split the temporary set into validation and test sets (50% valid, 50% test of the temp, so 10% each of the original)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

print(f"\nTraining set size: {len(X_train)}")
print(f"Validation set size: {len(X_valid)}")
print(f"Test set size: {len(X_test)}")


Training set size: 5267866
Validation set size: 1053573
Test set size: 263394


In [15]:
X_train

Unnamed: 0,cam_2d_direct,cam_pos_1,cam_pos_2,cam_3d_direct_1,cam_3d_direct_2,cam_3d_direct_3,2d_visible_1,2d_visible_2
642328,46.410130,-25.753586,1.049256,76.691313,-45.630760,-9.597773,1542.0,333.0
5076388,178.902366,-7.517267,21.359837,-80.520326,-1.082648,-179.819233,1295.0,341.0
1021105,205.164013,-15.698328,0.164730,-79.902705,24.821526,175.724743,1783.0,542.0
5184437,170.787071,-23.950587,21.717433,-80.672051,-9.093153,-178.512980,480.0,399.0
5793648,178.902366,-12.511599,21.063986,-80.520326,-1.082648,-179.819233,245.0,463.0
...,...,...,...,...,...,...,...,...
4567267,178.902366,-12.511599,21.063986,-80.520326,-1.082648,-179.819233,1071.0,433.0
1809199,46.410130,-13.398131,-21.992243,76.691313,-45.630760,-9.597773,21.0,375.0
6929602,321.855741,5.399701,-23.596222,77.270046,37.453704,7.822322,682.0,500.0
6796181,23.349311,-26.236181,-5.955736,87.609887,-23.331171,-0.947057,1037.0,746.0


In [16]:
y_train

Unnamed: 0,3d_loc_1,3d_loc_2,3d_loc_3
642328,-7.463885,9.269132,0.930066
5076388,-13.720749,-12.829933,0.931640
1021105,-25.224881,-6.948549,1.413329
5184437,-9.710749,-8.775383,0.442647
5793648,-7.616770,10.371222,0.932380
...,...,...,...
4567267,-13.394169,2.800493,0.966683
1809199,-4.791188,6.342878,0.870579
6929602,-13.898756,-6.362843,0.954697
6796181,-20.107022,6.703674,0.913194


In [17]:
%%time
# 13. Train the model
pipeline.fit(X_train, y_train)

CPU times: user 691 ms, sys: 204 ms, total: 896 ms
Wall time: 683 ms


In [18]:
# 14. Make predictions on the validation set (for evaluation during development)
y_valid_pred = pipeline.predict(X_valid)

# 15. Evaluate the model on the validation set
mse_valid = mean_squared_error(y_valid, y_valid_pred)
print(f'\nMean Squared Error on the validation set: {mse_valid}')

# You can now use the validation set to tune hyperparameters, compare models, etc.
# Once you are satisfied with your model, you would evaluate it on the test set.

# To evaluate on the test set (after final model selection):
y_test_pred = pipeline.predict(X_test)
mse_test = mean_squared_error(y_test, y_test_pred)
print(f'\nMean Squared Error on the test set: {mse_test}')


Mean Squared Error on the validation set: 41.62695569055053

Mean Squared Error on the test set: 41.703718139915104


In [18]:

# 8. Define a PyTorch MLP model for 2D -> 3D prediction
import torch
import torch.nn as nn

class BetterTwoDtoThreeDModel(nn.Module):
    def __init__(self, input_dim, output_dim=3):
        super(BetterTwoDtoThreeDModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.model(x)


In [19]:

# 9. Prepare the data for PyTorch
import numpy as np

X_np = X_dropped.to_numpy().astype(np.float32)
y_np = y.to_numpy().astype(np.float32)

X_train_tensor = torch.tensor(X_np)
y_train_tensor = torch.tensor(y_np)

input_dim = X_train_tensor.shape[1]


In [None]:
# 11. Split the data and validate the model
from sklearn.model_selection import train_test_split

# Split into training and validation sets
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(X_np, y_np, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train_np)
y_train_tensor = torch.tensor(y_train_np)
X_val_tensor = torch.tensor(X_val_np)
y_val_tensor = torch.tensor(y_val_np)

In [21]:
# MODEL TRAINING

criterion = nn.MSELoss()

# Reinitialize the model for clean training
model = TwoDtoThreeDModel(input_dim=input_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

n_epochs = 300

model.train()
# Training with validation
for epoch in range(n_epochs):
    optimizer.zero_grad()
    preds = model(X_train_tensor)
    loss = criterion(preds, y_train_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0 or epoch == n_epochs - 1:
        print(f"Epoch {epoch}, Train Loss: {loss.item():.4f}")
        
print("Training done!")

Epoch 0, Train Loss: 4294.5752
Epoch 10, Train Loss: 665.5290
Epoch 20, Train Loss: 92.2996
Epoch 30, Train Loss: 144.5048
Epoch 40, Train Loss: 63.6807
Epoch 50, Train Loss: 68.5091
Epoch 60, Train Loss: 56.2084
Epoch 70, Train Loss: 55.0299
Epoch 80, Train Loss: 52.6862
Epoch 90, Train Loss: 50.4224
Epoch 99, Train Loss: 48.7231
Training done!


In [22]:
# Validation
model.eval()
with torch.no_grad():
    val_preds = model(X_val_tensor)
    val_loss = criterion(val_preds, y_val_tensor)
print(f"Val Loss: {val_loss.item():.4f}")

Val Loss: 48.4948
