In [5]:
import pandas as pd

# Load the Excel file
file_path = './Delta_cell_area.xlsx'
excel_file = pd.ExcelFile(file_path)

# Prepare an empty dictionary to hold the modified sheets
modified_sheets = {}

# Iterate over the sheets and add a new row at the bottom
for sheet_name in excel_file.sheet_names:
    # Load the sheet into a DataFrame
    df = pd.read_excel(excel_file, sheet_name=sheet_name)
    
    # Determine the number to add based on the sheet name
    if sheet_name == 'Mov. 1':
        number = 1
    elif sheet_name == 'Mov. 2':
        number = 2
    elif sheet_name == 'Mov. 3':
        number = 3
    else:
        continue  # Skip any sheets that don't match these names
    
    # Create a new row with "Movie" in the 3rd cell and the determined number in the remaining cells
    new_row = pd.DataFrame([[None, None, "Movie"] + [number] * (df.shape[1] - 3)], columns=df.columns)
    
    # Append the new row to the DataFrame using pd.concat
    df = pd.concat([df, new_row], ignore_index=True)
    
    # Save the modified DataFrame back to the dictionary
    modified_sheets[sheet_name] = df

# Save the modified sheets to a new Excel file
output_file_path = './Delta_cell_area_modified_with_movie_row.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in modified_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Modified Excel file has been saved to {output_file_path}")



Modified Excel file has been saved to ./Delta_cell_area_modified_with_movie_row.xlsx


In [7]:
import pandas as pd
import numpy as np

# Load the data from the provided Excel file
file_path = './Delta_cell_area_modified_with_movie_row.xlsx'
data = pd.read_excel(file_path, sheet_name=None)

# Function to randomly impute NaN and empty values within the upper and lower limits of each column, ignoring the first row
def random_impute(data_dict):
    imputed_data = {}
    
    for sheet_name, df in data_dict.items():
        df_imputed = df.copy()
        
        for column in df.columns:
            if df[column].dtype.kind in 'biufc':  # Check if the column is numeric
                non_nan_values = df.iloc[1:, df.columns.get_loc(column)].dropna()
                if len(non_nan_values) > 0:
                    min_val = non_nan_values.min()
                    max_val = non_nan_values.max()
                    
                    # Generate random values to replace NaNs and empty cells, ignoring the first row
                    random_values = np.random.uniform(min_val, max_val, size=df.iloc[1:, df.columns.get_loc(column)].isna().sum())
                    
                    # Fill NaNs and empty cells with the generated random values, ignoring the first row
                    df_imputed.iloc[1:, df_imputed.columns.get_loc(column)] = df_imputed.iloc[1:, df_imputed.columns.get_loc(column)].apply(
                        lambda x: np.random.uniform(min_val, max_val) if pd.isna(x) or x == '' else x
                    )
                else:
                    # If the column is entirely NaN, fill it with random values between 0 and 1, ignoring the first row
                    df_imputed.iloc[1:, df_imputed.columns.get_loc(column)] = np.random.uniform(0, 1, size=len(df.iloc[1:, df.columns.get_loc(column)]))
            else:
                # For non-numeric columns, fill NaNs and empty cells with a random choice from existing non-NaN values, ignoring the first row
                non_nan_values = df.iloc[1:, df.columns.get_loc(column)].dropna().unique()
                if len(non_nan_values) > 0:
                    random_values = np.random.choice(non_nan_values, size=df.iloc[1:, df.columns.get_loc(column)].isna().sum())
                    df_imputed.iloc[1:, df_imputed.columns.get_loc(column)] = df_imputed.iloc[1:, df_imputed.columns.get_loc(column)].apply(
                        lambda x: np.random.choice(non_nan_values) if pd.isna(x) or x == '' else x
                    )
        
        imputed_data[sheet_name] = df_imputed
    
    return imputed_data

# Impute missing data in each sheet using random values within column limits
data_imputed = random_impute(data)

# Save the imputed data to a new Excel file
output_file_path = './Delta_cell_area_imputed_one_Movie_random.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in data_imputed.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Imputed data saved to {output_file_path}")

#  print the imputed data for verification
for sheet_name, df in data_imputed.items():
    print(f"\nSheet: {sheet_name}")
    print(df.head())


Imputed data saved to ./Delta_cell_area_imputed_one_Movie_random.xlsx

Sheet: Mov. 1
   Mov 1      Delta cell area  N/A = Not applied N/A = Not applied.1  \
0  Slice                 Time  Time (20 minutes)              Cell 1   
1      1             23:00:00                  0             -0.1581   
2      2             23:20:00                 20             -1.4228   
3      3             23:40:00                 40             -0.1581   
4      4  1900-01-01 00:00:00                 60             -0.3162   

  Unnamed: 4 Unnamed: 5 Unnamed: 6 Unnamed: 7 Unnamed: 8 Unnamed: 9  ...  \
0     Cell 2     Cell 3     Cell 4     cell 5     Cell 6     Cell 7  ...   
1     0.1581      0.238     0.1581    -0.5533     -0.553    -1.1857  ...   
2    -0.7905        0.0     1.4229    -0.9486      0.079    -0.7905  ...   
3    -0.1581      1.186    -0.7905     1.1067     -0.711     2.3714  ...   
4    -0.2371      0.158    -0.2372     0.7114      0.949       1.66  ...   

  Unnamed: 36 Unnamed: 37

In [14]:
import pandas as pd


file_path = './Delta_cell_area_imputed_one_Movie_random.xlsx'
data = pd.read_excel(file_path, sheet_name=None)

# Function to transpose the data in each sheet
def transpose_data(data_dict):
    transposed_data = {}
    
    for sheet_name, df in data_dict.items():
        transposed_data[sheet_name] = df.T
    
    return transposed_data

# Transpose the data
data_transposed = transpose_data(data)

# Save the transposed data to a new Excel file
output_file_path = './Delta_cell_area_transposed.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in data_transposed.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Transposed data saved to {output_file_path}")

# print the transposed data for verification
for sheet_name, df in data_transposed.items():
    print(f"\nSheet: {sheet_name}")
    print(df.head())


Transposed data saved to ./Delta_cell_area_transposed.xlsx

Sheet: Mov. 1
                                    0         1         2         3   \
Mov 1                            Slice         1         2         3   
Delta cell area                   Time  23:00:00  23:20:00  23:40:00   
N/A = Not applied    Time (20 minutes)         0        20        40   
N/A = Not applied.1             Cell 1   -0.1581   -1.4228   -0.1581   
Unnamed: 4                      Cell 2    0.1581   -0.7905   -0.1581   

                                      4                    5   \
Mov 1                                  4                    5   
Delta cell area      1900-01-01 00:00:00  1900-01-01 00:20:00   
N/A = Not applied                     60                   80   
N/A = Not applied.1              -0.3162              -0.3162   
Unnamed: 4                       -0.2371              -0.6324   

                                      6                    7   \
Mov 1                                

In [15]:
data = data_transposed.copy()

# Concatenate the data from all sheets
concatenated_df = pd.concat(data.values(), ignore_index=True)

# Save the concatenated data to a new Excel file
output_file_path = './Delta_cell_area_with_concatenated.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in data.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)
    concatenated_df.to_excel(writer, sheet_name='Concat_Data', index=False)

print(f"Concatenated data saved to {output_file_path}")

# print the concatenated data for verification
print("\nConcatenated Data:")
print(concatenated_df.head())

Concatenated data saved to ./Delta_cell_area_with_concatenated.xlsx

Concatenated Data:
                  0         1         2         3                    4   \
0              Slice         1         2         3                    4   
1               Time  23:00:00  23:20:00  23:40:00  1900-01-01 00:00:00   
2  Time (20 minutes)         0        20        40                   60   
3             Cell 1   -0.1581   -1.4228   -0.1581              -0.3162   
4             Cell 2    0.1581   -0.7905   -0.1581              -0.2371   

                    5                    6                    7   \
0                    5                    6                    7   
1  1900-01-01 00:20:00  1900-01-01 00:40:00  1900-01-01 01:00:00   
2                   80                  100                  120   
3              -0.3162               0.6324               0.3953   
4              -0.6324               0.8695               0.7905   

                    8                    9   ...    

In [19]:
file_path = './Delta_cell_area_with_concatenated.xlsx'
data = pd.read_excel(file_path, sheet_name=None)

# Get the concatenated data sheet
concatenated_df1 = data['Concat_Data']

# Delete rows with the name "Slice"
cleaned_df = concatenated_df1[concatenated_df1.iloc[:, 0] != 'Slice']

# Save the cleaned data back to a new Excel file
output_file_path = './Delta_cell_area_cleaned.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in data.items():
        if sheet_name != 'Concat_Data':
            df.to_excel(writer, sheet_name=sheet_name, index=False)
    cleaned_df.to_excel(writer, sheet_name='Concat_Data', index=False)

print(f"Cleaned data saved to {output_file_path}")

# print the cleaned data for verification
print("\nCleaned Concatenated Data:")
print(cleaned_df.head())

Cleaned data saved to ./Delta_cell_area_cleaned.xlsx

Cleaned Concatenated Data:
                  0         1         2         3                    4   \
1               Time  23:00:00  23:20:00  23:40:00  1900-01-01 00:00:00   
2  Time (20 minutes)         0        20        40                   60   
3             Cell 1   -0.1581   -1.4228   -0.1581              -0.3162   
4             Cell 2    0.1581   -0.7905   -0.1581              -0.2371   
5             Cell 3     0.238         0     1.186                0.158   

                    5                    6                    7   \
1  1900-01-01 00:20:00  1900-01-01 00:40:00  1900-01-01 01:00:00   
2                   80                  100                  120   
3              -0.3162               0.6324               0.3953   
4              -0.6324               0.8695               0.7905   
5                -1.66               -1.028                0.632   

                    8                    9   ...           

In [20]:
# Load the data from the provided Excel file
file_path = './Delta_cell_area_with_concatenated.xlsx'
data = pd.read_excel(file_path, sheet_name=None)

# Filter the "Concatenated Data" sheet to remove specified rows
concatenated_df = data['Concat_Data']
filtered_df = concatenated_df[~concatenated_df.iloc[:, 0].isin(["Slice", "Time", "Time (20 minutes)"])]

# Save the filtered data back to a new Excel file
output_file_path = './Delta_cell_area_filtered.xlsx'
with pd.ExcelWriter(output_file_path) as writer:
    for sheet_name, df in data.items():
        if sheet_name == 'Concat_Data':
            filtered_df.to_excel(writer, sheet_name=sheet_name, index=False)
        else:
            df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Filtered data saved to {output_file_path}")

# print the filtered data for verification
print("\nFiltered Data:")
print(filtered_df.head())

Filtered data saved to ./Delta_cell_area_filtered.xlsx

Filtered Data:
       0       1       2       3       4       5       6       7       8   \
3  Cell 1 -0.1581 -1.4228 -0.1581 -0.3162 -0.3162  0.6324  0.3953 -0.4743   
4  Cell 2  0.1581 -0.7905 -0.1581 -0.2371 -0.6324  0.8695  0.7905  0.5533   
5  Cell 3   0.238       0   1.186   0.158   -1.66  -1.028   0.632   -1.66   
6  Cell 4  0.1581  1.4229 -0.7905 -0.2372  0.9486 -1.0276   -1.66  0.7905   
7  cell 5 -0.5533 -0.9486  1.1067  0.7114 -0.3162 -1.8971 -1.8971       0   

       9   ... 40   41   42   43   44   45   46   47   48   49  
3 -0.4743  ...  1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  
4 -0.8695  ...  1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  
5   1.186  ...  1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  
6 -1.9762  ...  1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  
7  0.2371  ...  1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  

[5 rows x 50 columns]


In [21]:
# Load the data from the provided Excel file
file_path = './Delta_cell_area_filtered.xlsx'
data = pd.read_excel(file_path, sheet_name='Concat_Data')

# Save the "Concat_Data" sheet to a separate CSV file
csv_output_path = './Concat_Data.csv'
data.to_csv(csv_output_path, index=False)

print(f"Concat_Data sheet saved to {csv_output_path}")

Concat_Data sheet saved to ./Concat_Data.csv


In [11]:
# Rename the first row
new_header = ['S' + str(i) for i in range(data.shape[1])]
data.columns = new_header


data.head()

# Save the "Concat_Data" sheet to a separate CSV file
csv_output_path = './Concat_Data.csv'
data.to_csv(csv_output_path, index=False)

print(f"Concat_Data sheet saved to {csv_output_path}")

Concat_Data sheet saved to ./Concat_Data.csv


In [22]:
import pandas as pd
import numpy as np
import random


# Load the data from the provided CSV file
file_path = './Concat_Data.csv'
data = pd.read_csv(file_path, header=None)

# Function to replace NaN or empty cells with a random value from the same row
def impute_row(row):
    non_missing_values = [val for val in row if not pd.isna(val) and val != ' ']
    return [random.choice(non_missing_values) if pd.isna(val) or val == ' ' else val for val in row]

# Apply the function to each row in the DataFrame
data_imputed = data.apply(impute_row, axis=1)

# Save the imputed data to a new CSV file
csv_output_path = './Concat_Delta_Data_imputed.csv'
data_imputed.to_csv(csv_output_path, index=False)

print(f"Imputed data saved to {csv_output_path}")

Imputed data saved to ./Concat_Delta_Data_imputed.csv


In [23]:
import pandas as pd
import numpy as np


file_path = './Concat_Data.csv'
data = pd.read_csv(file_path, header=0)

# Function to impute NaN or empty cells with a random value from the same row, ignoring the first column
def impute_random_row_value(df):
    for i in range(df.shape[0]):  # Do not skip the first row
        row = df.iloc[i, 1:]  # Skip the first column
        row_values = row.dropna().values
        if len(row_values) > 0:
            for j in range(1, len(row) + 1):  # Skip the first column
                if pd.isna(df.iat[i, j]) or df.iat[i, j] == ' ':
                    df.iat[i, j] = np.random.choice(row_values)
    return df

# Impute the data
data = impute_random_row_value(data)

# Round the values to 3 decimal places, ignoring the first column
data.iloc[:, 1:] = data.iloc[:, 1:].apply(pd.to_numeric, errors='coerce').round(3)

# Save the imputed data to a new CSV file
csv_output_path = './Concat_Data_imputed_rounded.csv'
data.to_csv(csv_output_path, index=False)

print(f"Imputed and rounded data saved to {csv_output_path}")




Imputed and rounded data saved to ./Concat_Data_imputed_rounded.csv


In [27]:
import pandas as pd

# Load the CSV file
file_path = './Concat_Data_imputed_rounded.csv'
df = pd.read_csv(file_path)

# Extract the 49th column for Movie number (index 49) and save it in a separate DataFrame
col_49_df = df.iloc[:, [49]].copy()

# Drop the 49th column (index 49) and save it in a separate DataFrame
df_without_col_49 = df.drop(df.columns[49], axis=1)

# Save the new DataFrame to a CSV file
output_file_path = './Concat_Data_without_col_Movie.csv'
df_without_col_49.to_csv(output_file_path, index=False)

print(f"The 49th column has been dropped and the new DataFrame has been saved to {output_file_path}")


The 49th column has been dropped and the new DataFrame has been saved to ./Concat_Data_without_col_Movie.csv


In [28]:

file_path = './Concat_Data_without_col_Movie.csv'
data = pd.read_csv(file_path, header=0)

# Function to rename the first column of each row
def rename_first_column(df):
    df.iloc[:, 0] = [f'Cell {i+1}' for i in range(df.shape[0])]
    return df

# Rename the first column of each row
data = rename_first_column(data)

# Save the modified data to a new CSV file
csv_output_path = './Concat_Data1_renamed.csv'
data.to_csv(csv_output_path, index=False)

print(f"Renamed data saved to {csv_output_path}")

Renamed data saved to ./Concat_Data1_renamed.csv


In [31]:
import pandas as pd

file_path = './Concat_Data1_renamed.csv'
data = pd.read_csv(file_path, header=0)

# Function to engineer new features using descriptive statistics
def engineer_features(df):
    stats_df = pd.DataFrame()
    stats_df['Mean'] = df.mean(axis=1)
    stats_df['Median'] = df.median(axis=1)
    stats_df['StdDev'] = df.std(axis=1)
    stats_df['Min'] = df.min(axis=1)
    stats_df['Max'] = df.max(axis=1)
    stats_df['25th Percentile'] = df.quantile(0.25, axis=1)
    stats_df['75th Percentile'] = df.quantile(0.75, axis=1)
    stats_df['Range'] = stats_df['Max'] - stats_df['Min']
    return stats_df

# Exclude the first column when calculating statistics
numeric_data = data.iloc[:, 1:]

# Engineer new features
features = engineer_features(numeric_data)

# Combine the original data with the new features
combined_data = pd.concat([data, features], axis=1)

# Save the combined data to a new CSV file
csv_output_path = './Concat_Data_with_features.csv'
combined_data.to_csv(csv_output_path, index=False)

print(f"Data with engineered features saved to {csv_output_path}")


Data with engineered features saved to ./Concat_Data_with_features.csv


In [32]:
combined_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,47,48,Mean,Median,StdDev,Min,Max,25th Percentile,75th Percentile,Range
0,Cell 1,-0.158,-1.423,-0.158,-0.316,-0.316,0.632,0.395,-0.474,-0.474,...,-0.474,0.395,-0.028,-0.1975,0.8098,-1.581,1.502,-0.474,0.45425,3.083
1,Cell 2,0.158,-0.79,-0.158,-0.237,-0.632,0.87,0.79,0.553,-0.87,...,-1.344,0.158,0.044479,0.1185,0.679961,-1.581,1.107,-0.237,0.5925,2.688
2,Cell 3,0.238,0.0,1.186,0.158,-1.66,-1.028,0.632,-1.66,1.186,...,-0.079,-0.474,-0.044437,-0.0395,0.859101,-1.977,2.292,-0.474,0.31675,4.269
3,Cell 4,0.158,1.423,-0.79,-0.237,0.949,-1.028,-1.66,0.79,-1.976,...,-0.79,0.158,0.003333,0.0,0.994783,-2.292,1.739,-0.6715,0.87,4.031
4,Cell 5,-0.553,-0.949,1.107,0.711,-0.316,-1.897,-1.897,0.0,0.237,...,1.107,-0.158,-0.116937,0.0,0.875039,-1.897,1.502,-0.87,0.49375,3.399


In [33]:
import pandas as pd
from statsmodels.tsa.stattools import acf


file_path = './Concat_Data1_renamed.csv'
data = pd.read_csv(file_path, header=0)

# Function to calculate autocorrelation for each row
def calculate_autocorrelation(df, max_lag):
    autocorr_features = pd.DataFrame()
    for lag in range(1, max_lag + 1):
        autocorr_features[f'Autocorr_Lag_{lag}'] = df.apply(lambda row: acf(row, nlags=lag)[lag], axis=1)
    return autocorr_features

# Exclude the first column when calculating autocorrelation
numeric_data = data.iloc[:, 1:]

# Set the maximum lag for autocorrelation
max_lag = 5  # You can adjust this value based on your needs

# Calculate autocorrelation features
autocorr_features = calculate_autocorrelation(numeric_data, max_lag)

# Combine the original data with the new features
combined_data1 = pd.concat([data, autocorr_features], axis=1)

# Save the combined data to a new CSV file
csv_output_path = './Concat_Data_with_autocorr_features.csv'
combined_data1.to_csv(csv_output_path, index=False)

print(f"Data with autocorrelation features saved to {csv_output_path}")


Data with autocorrelation features saved to ./Concat_Data_with_autocorr_features.csv


In [34]:
combined_data1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,Autocorr_Lag_1,Autocorr_Lag_2,Autocorr_Lag_3,Autocorr_Lag_4,Autocorr_Lag_5
0,Cell 1,-0.158,-1.423,-0.158,-0.316,-0.316,0.632,0.395,-0.474,-0.474,...,-0.395,-0.237,0.711,-0.474,0.395,-0.238164,0.177314,-0.13223,-0.031185,-0.050104
1,Cell 2,0.158,-0.79,-0.158,-0.237,-0.632,0.87,0.79,0.553,-0.87,...,0.79,-0.237,-0.158,-1.344,0.158,-0.158133,-0.01329,-0.166598,-0.128524,-0.144157
2,Cell 3,0.238,0.0,1.186,0.158,-1.66,-1.028,0.632,-1.66,1.186,...,0.0,-0.079,-0.237,-0.079,-0.474,-0.200886,-0.064798,0.011461,-0.147701,-0.232886
3,Cell 4,0.158,1.423,-0.79,-0.237,0.949,-1.028,-1.66,0.79,-1.976,...,1.739,0.948,-0.237,-0.79,0.158,-0.34352,0.091875,-0.100214,-0.127119,-0.066894
4,Cell 5,-0.553,-0.949,1.107,0.711,-0.316,-1.897,-1.897,0.0,0.237,...,-1.186,0.158,0.079,1.107,-0.158,-0.213207,0.069871,-0.149657,-0.173672,0.218807


In [35]:
print(col_49_df)

     49
0     1
1     1
2     1
3     1
4     1
..   ..
126   3
127   3
128   3
129   3
130   3

[131 rows x 1 columns]


In [37]:
import pandas as pd

# Load the CSV files into DataFrames
df1 = pd.read_csv('./Concat_Data_with_autocorr_features.csv')
df2 = pd.read_csv('./Concat_Data_with_features.csv')

# Merge the DataFrames on a common column (assumed to be an index column or a common column name)
merged_df = pd.merge(df1, df2, how='inner', on='0')  # Replace 'Common_Column_Name' with the actual column

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('./Merged_Concat_Data.csv', index=False)


In [40]:


import pandas as pd

# Load the CSV files
autocorr_df = pd.read_csv('./Concat_Data_with_autocorr_features.csv')
features_df = pd.read_csv('./Concat_Data_with_features.csv')

# Extract the specified columns from both DataFrames
autocorr_columns = autocorr_df[['Autocorr_Lag_1', 'Autocorr_Lag_2', 'Autocorr_Lag_3', 'Autocorr_Lag_4', 'Autocorr_Lag_5']]
features_columns = features_df[['Mean', 'Median', 'StdDev', 'Min', 'Max', '25th Percentile', '75th Percentile', 'Range']]

# Drop the extracted columns from both DataFrames
autocorr_df = autocorr_df.drop(columns=['Autocorr_Lag_1', 'Autocorr_Lag_2', 'Autocorr_Lag_3', 'Autocorr_Lag_4', 'Autocorr_Lag_5'])
features_df = features_df.drop(columns=['Mean', 'Median', 'StdDev', 'Min', 'Max', '25th Percentile', '75th Percentile', 'Range'])

# Merge the remaining DataFrames
merged_df = pd.concat([autocorr_df, features_df], axis=1)

# Append the extracted columns to the merged DataFrame
final_df = pd.concat([merged_df, autocorr_columns, features_columns], axis=1)

# Save the final DataFrame to a new CSV file
final_df.to_csv('./Merged_Concat_Data_with_all_features.csv', index=False)


In [43]:
# Rename the column in col_49_df to "Movie#"
col_49_df.columns = ['Movie#']

# Append the col_49_df data to the end of merged_df
final_df = pd.concat([merged_df, col_49_df, autocorr_columns, features_columns], axis=1)

# Save the updated DataFrame to a new CSV file
final_df.to_csv('Updated_Merged_Concat_Data_with_col_Movie.csv', index=False)

In [44]:
# Load your CSV file into a DataFrame
df = pd.read_csv('./Updated_Merged_Concat_Data_with_col_Movie.csv')

# Drop the first 49 columns
df_dropped = df.iloc[:, 49:]

# Save the resulting DataFrame to a new CSV file
df_dropped.to_csv('./Updated_Data_merged_Features.csv', index=False)