In [None]:
import pandas as pd

# Define the two DataFrames
df1 = pd.DataFrame([['a', 1], ['b', 2]],
                   columns=['letter', 'number'])
df2 = pd.DataFrame([['c', 1], ['d', 2]],
                   columns=['letter', 'number'])

# Concatenate the DataFrames vertically (axis=0) and reset the index
# The default axis is 0, so we don't need to specify it.
df_combined = pd.concat([df1, df2], ignore_index=True)

# Print the resulting DataFrame and its index type
print("Combined DataFrame:")
print(df_combined)
print("\nIndex Type:")
print(df_combined.index)

In [None]:
import pandas as pd

# Define the first DataFrame (df1)
df1_dict = {
        'id': ['1', '2', '3', '4', '5'],
        'Feature1': ['A', 'C', 'E', 'G', 'I'],
        'Feature2': ['B', 'D', 'F', 'H', 'J']}
df1 = pd.DataFrame(df1_dict, columns = ['id', 'Feature1', 'Feature2'])

# Define the second DataFrame (df2)
df2_dict = {
        'id': ['1', '2', '6', '7', '8'],
        'Feature1': ['K', 'M', 'O', 'Q', 'S'],
        'Feature2': ['L', 'N', 'P', 'R', 'T']}
df2 = pd.DataFrame(df2_dict, columns = ['id', 'Feature1', 'Feature2'])

print("--- DataFrame 1 ---")
print(df1)
print("\n--- DataFrame 2 ---")
print(df2)
print("\n" + "="*50 + "\n")

In [None]:
# Inner Merge: Keeps only the intersecting IDs ('1' and '2')
df_inner = pd.merge(df1, df2, on='id', how='inner')

print("Output 1: Inner Merge")
print(df_inner)

In [None]:
# Outer Merge: Keeps all IDs from both DataFrames
df_outer = pd.merge(
    df1,
    df2,
    on='id',
    how='outer',
    suffixes=('_df1', '_df2') # Set custom suffixes for clarity
)

# Replace 'nan' with 'nannan' for visual match with your example (optional step)
df_outer = df_outer.fillna('nan')

print("\nOutput 2: Full Outer Merge")
print(df_outer)

In [5]:
import pandas as pd
import numpy as np

# generate days
all_dates = pd.date_range('2021-01-01', '2021-12-15')
business_dates = pd.bdate_range('2021-01-01', '2021-12-31')

# generate tickers
tickers = ['AAPL', 'FB', 'GE', 'AMZN', 'DAI']

# create indexs
index_alt = pd.MultiIndex.from_product([all_dates, tickers], names=['Date', 'Ticker'])
index = pd.MultiIndex.from_product([business_dates, tickers], names=['Date', 'Ticker'])

# create DFs
market_data = pd.DataFrame(index=index,
                        data=np.random.randn(len(index), 3),
                        columns=['Open','Close','Close_Adjusted'])

alternative_data = pd.DataFrame(index=index_alt,
                                data=np.random.randn(len(index_alt), 2),
                                columns=['Twitter','Reddit'])

# --- Question 1: Merge MultiIndex ---
merged_df = market_data.merge(alternative_data, 
                             how='left', 
                             left_index=True, 
                             right_index=True)

# --- Question 2: Fill Missing Values ---
filled_df = merged_df.fillna(0)

# Validation check:
validation_result = filled_df.sum().sum() == merged_df.sum().sum()

# Output the required validation results
print(f"DataFrame Shape: {merged_df.shape}")
print("\nmerged_df.head():")
print(merged_df.head())
print(f"\nValidation Check (Q2): {validation_result}")

DataFrame Shape: (1305, 5)

merged_df.head():
                       Open     Close  Close_Adjusted   Twitter    Reddit
Date       Ticker                                                        
2021-01-01 AAPL    0.232440 -0.642224        0.701506  1.255081  0.600231
           FB      0.947082  0.718007        1.208549  0.551082 -0.588527
           GE      0.838357  0.546035        0.936297 -1.639090 -0.465402
           AMZN    0.634875 -0.835024       -0.711350 -0.902523  0.987333
           DAI     1.626691 -0.785365       -0.961057  0.169458  2.036299

Validation Check (Q2): True
