In [8]:
import pandas as pd

# Step 1: Load the original CSV file
# Replace 'airlines_original.csv' with the path to your CSV file
input_file = 'Revenue_Growth.csv'
output_file = 'Revenue_Growth_ideal_format.csv'

# Read the CSV file without headers because the structure is custom
df = pd.read_csv(input_file, header=None)

# Step 2: Extract Column Names and Data
# The first row contains the labels for the columns (e.g., 'Airline', 'Status', etc.)
column_labels = df.iloc[0, 1:].tolist()  # Skip the first column which is labels
column_labels.insert(0, 'Attribute')     # Add 'Attribute' as the first column name

# Assign column names to the DataFrame
df.columns = column_labels

# Remove the first row since it's now used as headers
df = df.drop(0).reset_index(drop=True)

# Step 3: Transpose the DataFrame
# Set 'Attribute' as the index to transpose correctly
df.set_index('Attribute', inplace=True)

# Transpose the DataFrame
df_transposed = df.T.reset_index().rename(columns={'index': 'Airline'})

# Step 4: Melt the DataFrame to Long Format
# Identify columns that are dates (assuming they contain a "'Q" pattern)
date_columns = [col for col in df_transposed.columns if "'Q" in col]

# Melt the DataFrame to have 'Date', 'Value' columns
long_df = pd.melt(df_transposed, id_vars=['Airline'], value_vars=date_columns,
                  var_name='Date', value_name='Revenue')

# Step 5: Clean the Data
# Remove dollar signs and commas from 'Value' and convert to numeric
long_df['Revenue'] = long_df['Revenue'].replace({'%': '', ',': ''}, regex=True)
long_df['Revenue'] = pd.to_numeric(long_df['Revenue'], errors='coerce')

# Remove any rows with missing or zero values if necessary
long_df = long_df.dropna(subset=['Revenue'])
long_df = long_df[long_df['Revenue'] != 0]

# Optional: If you have 'Status', 'Country', etc., merge them back
# Extract 'Status', 'Country', etc., from df_transposed
metadata_cols = ['Airline', 'Status', 'Country', 'Region','IATA']
metadata = df_transposed[metadata_cols].drop_duplicates()

# Merge metadata back into the long DataFrame
final_df = pd.merge(long_df, metadata, on='Airline', how='left')

# Step 6: Save the Data to a New CSV File
final_df.to_csv(output_file, index=False)



In [5]:
import pandas as pd

# Step 1: Load the original CSV file
# Replace 'airlines_original.csv' with the path to your CSV file
input_file = 'EBITDA.csv'
output_file = 'EBITDA_ideal_format.csv'

# Read the CSV file without headers because the structure is custom
df = pd.read_csv(input_file, header=None)

# Step 2: Extract Column Names and Data
# The first row contains the labels for the columns (e.g., 'Airline', 'Status', etc.)
column_labels = df.iloc[0, 1:].tolist()  # Skip the first column which is labels
column_labels.insert(0, 'Attribute')     # Add 'Attribute' as the first column name

# Assign column names to the DataFrame
df.columns = column_labels

# Remove the first row since it's now used as headers
df = df.drop(0).reset_index(drop=True)

# Step 3: Transpose the DataFrame
# Set 'Attribute' as the index to transpose correctly
df.set_index('Attribute', inplace=True)

# Transpose the DataFrame
df_transposed = df.T.reset_index().rename(columns={'index': 'Airline'})

# Step 4: Melt the DataFrame to Long Format
# Identify columns that are dates (assuming they contain a "'Q" pattern)
date_columns = [col for col in df_transposed.columns if "'Q" in col]

# Melt the DataFrame to have 'Date', 'Value' columns
long_df = pd.melt(df_transposed, id_vars=['Airline'], value_vars=date_columns,
                  var_name='Date', value_name='EBITDA')

# Step 5: Clean the Data
# Remove dollar signs and commas from 'Value' and convert to numeric
long_df['EBITDA'] = long_df['EBITDA'].replace({'%': '', ',': ''}, regex=True)
long_df['EBITDA'] = pd.to_numeric(long_df['EBITDA'], errors='coerce')

# Remove any rows with missing or zero values if necessary
long_df = long_df.dropna(subset=['EBITDA'])
long_df = long_df[long_df['EBITDA'] != 0]

# Optional: If you have 'Status', 'Country', etc., merge them back
# Extract 'Status', 'Country', etc., from df_transposed
metadata_cols = ['Airline', 'Status', 'Country', 'Region','IATA']
metadata = df_transposed[metadata_cols].drop_duplicates()

# Merge metadata back into the long DataFrame
final_df = pd.merge(long_df, metadata, on='Airline', how='left')

# Step 6: Save the Data to a New CSV File
final_df.to_csv(output_file, index=False)



In [9]:
import pandas as pd

# Read the CSV files
df1 = pd.read_csv('airlines_ideal_format.csv')  # Contains 'Value' column
df2 = pd.read_csv('Revenue_Growth_ideal_format.csv')  # Contains 'Revenue' column
df3 = pd.read_csv('EBITDA_ideal_format.csv')  # Contains 'EBITDA' column

# Merge df1 and df2 on the common columns
merged_df1 = pd.merge(df1, df2, on=['Airline', 'Date', 'Status', 'Country', 'Region', 'IATA'])

# Merge the result with df3 on the same common columns
final_merged_df = pd.merge(merged_df1, df3, on=['Airline', 'Date', 'Status', 'Country', 'Region', 'IATA'])

# Save the final merged DataFrame to a new CSV
final_merged_df.to_csv('Bubble.csv', index=False)

# Optionally, display the result for verification
print(final_merged_df.head())


KeyboardInterrupt: 