## Purpose
This note book creates a dataset upon which a machine learning algorithm will be applied. The goal is to predict the price movement of the Dow Jones Industrial Average (DJIA) based on the average prices of wheat, dairy, and beef, along with the food CPI. Price movement of the DJIA will be categorized as positive (increase over previous month) or negative (decrease over previous month), with no movement (0%) consider to be positive. 

In [516]:
import pandas as pd

In [517]:
# Import datasets as dataframes from csv files
djia_df = pd.read_csv("C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/cleaned_djia.csv")
beef_df = pd.read_csv("C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/FRED_beef_cleaned.csv")
milk_df = pd.read_csv("C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/cleaned_milk_data.csv")
wheat_df = pd.read_csv("C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/avg_price_wheat_cleaned.csv")
CPI_Comp_df = pd.read_csv("C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/CPI_Comp.csv")

In [518]:
# Check dataframes

In [519]:
djia_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Open,DJIA_High,DJIA_Low,DJIA_Volume,DJIA_Change_Percent
0,2023-03-01,32930.14,32656.37,32973.59,32500.84,,0.84
1,2023-02-01,32654.98,34039.6,34333.87,32638.35,,-4.2
2,2023-01-01,34086.89,33225.61,34342.28,32812.33,,2.83
3,2022-12-01,33147.28,34533.59,34711.63,32573.43,,-4.16
4,2022-11-01,34587.46,32927.61,34587.46,31728.85,,5.66


In [520]:
# DJIA data sorted in reverse chronological order

In [521]:
beef_df.head()

Unnamed: 0,DATE,Beef $/LB,Beef_Pct_Change (Monthly)
0,1990-01-01,1.557,3.730846
1,1990-02-01,1.572,0.963391
2,1990-03-01,1.571,-0.063613
3,1990-04-01,1.593,1.400382
4,1990-05-01,1.577,-1.004394


In [522]:
milk_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Milk Cost per Gallon,Milk_Pct_Change (Monthly)
0,0,1995-07-01,2.477,
1,1,1995-08-01,2.482,0.201857
2,2,1995-09-01,2.459,-0.926672
3,3,1995-10-01,2.473,0.569337
4,4,1995-11-01,2.493,0.808734


In [523]:
# Milk data starts July 7, 1995 and has extra index column

In [524]:
wheat_df.head()

Unnamed: 0.1,Unnamed: 0,date_time,Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly)
0,120,1990-01-01,1.019,
1,121,1990-02-01,1.019,0.0
2,122,1990-03-01,1.019,0.0
3,123,1990-04-01,1.019,0.0
4,124,1990-05-01,1.019,0.0


In [525]:
CPI_Comp_df.head()

Unnamed: 0.1,Unnamed: 0,date_time,Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly)
0,276,1990-01-01,126.1,6.955047,1.857835
1,277,1990-02-01,127.0,7.263514,0.713719
2,278,1990-03-01,127.4,5.813953,0.314961
3,279,1990-04-01,128.1,5.955335,0.549451
4,280,1990-05-01,128.2,5.427632,0.078064


In [526]:
# Drop all extra index columns

In [527]:
milk_df = milk_df.drop("Unnamed: 0", axis=1)
milk_df.head()

Unnamed: 0,Date,Milk Cost per Gallon,Milk_Pct_Change (Monthly)
0,1995-07-01,2.477,
1,1995-08-01,2.482,0.201857
2,1995-09-01,2.459,-0.926672
3,1995-10-01,2.473,0.569337
4,1995-11-01,2.493,0.808734


In [528]:
wheat_df = wheat_df.drop("Unnamed: 0", axis=1)
CPI_Comp_df = CPI_Comp_df.drop("Unnamed: 0", axis=1)

In [529]:
# Drop unnecessary columns from DJIA data
col_list = list(djia_df.columns)
col_list.remove("date_time")
col_list.remove("DJIA_Change_Percent")
col_list.remove("DJIA_Price")

djia_df = djia_df.drop(col_list, axis=1)
djia_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent
0,2023-03-01,32930.14,0.84
1,2023-02-01,32654.98,-4.2
2,2023-01-01,34086.89,2.83
3,2022-12-01,33147.28,-4.16
4,2022-11-01,34587.46,5.66


In [448]:
# Change the names of columns to date_time

In [530]:
milk_df.rename(mapper={"Date" : "date_time"}, axis=1, inplace=True)
wheat_df.rename(mapper={"DATE" : "date_time"}, axis=1, inplace=True)
beef_df.rename(mapper={"DATE" : "date_time"}, axis=1, inplace=True)

In [531]:
milk_df.head()

Unnamed: 0,date_time,Milk Cost per Gallon,Milk_Pct_Change (Monthly)
0,1995-07-01,2.477,
1,1995-08-01,2.482,0.201857
2,1995-09-01,2.459,-0.926672
3,1995-10-01,2.473,0.569337
4,1995-11-01,2.493,0.808734


In [532]:
wheat_df.head()

Unnamed: 0,date_time,Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly)
0,1990-01-01,1.019,
1,1990-02-01,1.019,0.0
2,1990-03-01,1.019,0.0
3,1990-04-01,1.019,0.0
4,1990-05-01,1.019,0.0


In [533]:
beef_df.head()

Unnamed: 0,date_time,Beef $/LB,Beef_Pct_Change (Monthly)
0,1990-01-01,1.557,3.730846
1,1990-02-01,1.572,0.963391
2,1990-03-01,1.571,-0.063613
3,1990-04-01,1.593,1.400382
4,1990-05-01,1.577,-1.004394


In [534]:
# Sort djia_df in chronological order to match other tables

In [535]:
djia_df = djia_df.sort_values("date_time")
djia_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent
458,1990-01-01,2590.54,-5.91
457,1990-01-02,2590.54,0.0
456,1990-02-01,2627.25,1.42
455,1990-03-01,2707.21,3.04
454,1990-04-01,2656.76,-1.86


In [536]:
# Merge data into one dataframe

In [537]:
# Merge djia and beef
merged_df = djia_df.merge(beef_df, on="date_time")
merged_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly)
0,1990-01-01,2590.54,-5.91,1.557,3.730846
1,1990-02-01,2627.25,1.42,1.572,0.963391
2,1990-03-01,2707.21,3.04,1.571,-0.063613
3,1990-04-01,2656.76,-1.86,1.593,1.400382
4,1990-05-01,2876.66,8.28,1.577,-1.004394


In [538]:
# Merge wheat data

# Rename wheat columns and merge
merged_df = merged_df.merge(wheat_df, on="date_time")
merged_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly)
0,1990-01-01,2590.54,-5.91,1.557,3.730846,1.019,
1,1990-02-01,2627.25,1.42,1.572,0.963391,1.019,0.0
2,1990-03-01,2707.21,3.04,1.571,-0.063613,1.019,0.0
3,1990-04-01,2656.76,-1.86,1.593,1.400382,1.019,0.0
4,1990-05-01,2876.66,8.28,1.577,-1.004394,1.019,0.0


In [539]:
# Merge cpi data

# Rename cpi columns and merge
merged_df = merged_df.merge(CPI_Comp_df, on="date_time")
merged_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly)
0,1990-01-01,2590.54,-5.91,1.557,3.730846,1.019,,126.1,6.955047,1.857835
1,1990-02-01,2627.25,1.42,1.572,0.963391,1.019,0.0,127.0,7.263514,0.713719
2,1990-03-01,2707.21,3.04,1.571,-0.063613,1.019,0.0,127.4,5.813953,0.314961
3,1990-04-01,2656.76,-1.86,1.593,1.400382,1.019,0.0,128.1,5.955335,0.549451
4,1990-05-01,2876.66,8.28,1.577,-1.004394,1.019,0.0,128.2,5.427632,0.078064


In [540]:
# Merge milk data

# Rename milk date column and merge
merged_df = merged_df.merge(milk_df, on="date_time")
merged_df.head()

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly)
0,1995-07-01,4708.47,3.34,1.365,2.4006,1.147,-0.606586,138.2,0.582242,0.290276,2.477,
1,1995-08-01,4610.56,-2.08,1.328,-2.710623,1.161,1.220575,138.8,1.166181,0.434153,2.482,0.201857
2,1995-09-01,4789.08,3.87,1.376,3.614458,1.159,-0.172265,139.5,1.528384,0.504323,2.459,-0.926672
3,1995-10-01,4755.48,-0.7,1.371,-0.363372,1.175,1.3805,140.6,2.852963,0.78853,2.473,0.569337
4,1995-11-01,5074.49,6.71,1.368,-0.218818,1.169,-0.510638,141.0,3.296703,0.284495,2.493,0.808734


In [541]:
# Check out new merged dataframe
# Milk data only goes back to 1995-07-01 (other data went to 1990-01-01)
# See how far forward data goes and make sure prices are matched to dates correctly

In [542]:
djia_df.loc[djia_df["date_time"] == "1995-07-01"]

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent
369,1995-07-01,4708.47,3.34


In [543]:
beef_df.loc[beef_df["date_time"] == "1995-07-01"]

Unnamed: 0,date_time,Beef $/LB,Beef_Pct_Change (Monthly)
66,1995-07-01,1.365,2.4006


In [544]:
wheat_df.loc[wheat_df["date_time"] == "1995-07-01"]

Unnamed: 0,date_time,Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly)
66,1995-07-01,1.147,-0.606586


In [545]:
CPI_Comp_df.loc[CPI_Comp_df["date_time"] == "1995-07-01"]

Unnamed: 0,date_time,Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly)
66,1995-07-01,138.2,0.582242,0.290276


In [546]:
milk_df.loc[milk_df["date_time"] == "1995-07-01"]

Unnamed: 0,date_time,Milk Cost per Gallon,Milk_Pct_Change (Monthly)
0,1995-07-01,2.477,


In [547]:
# Price values look good

In [548]:
merged_df.tail(10)

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly)
321,2022-04-01,32977.21,-4.91,4.916,3.342443,2.145,1.131542,310.28,14.350577,1.134619,4.012,2.425326
322,2022-05-01,32991.97,0.04,4.794,-2.481692,2.22,3.496503,313.944,14.207137,1.180869,4.204,4.785643
323,2022-06-01,30779.71,-6.71,4.889,1.981644,2.23,0.45045,314.138,11.696291,0.061794,4.153,-1.21313
324,2022-07-01,32846.45,6.71,4.893,0.081816,2.316,3.856502,315.797,10.87677,0.528112,4.156,0.072237
325,2022-08-01,31511.09,-4.07,4.937,0.899244,2.298,-0.777202,317.433,10.640456,0.518054,4.194,0.914341
326,2022-09-01,28730.12,-8.83,4.862,-1.519141,2.362,2.78503,318.374,8.964276,0.296441,4.181,-0.309967
327,2022-10-01,32734.4,13.94,4.836,-0.534759,2.386,1.016088,319.917,8.002714,0.48465,4.184,0.071753
328,2022-11-01,34587.46,5.66,4.853,0.35153,2.419,1.383068,320.034,6.721044,0.036572,4.218,0.81262
329,2022-12-01,33147.28,-4.16,4.8,-1.092108,2.419,0.0,322.507,7.644424,0.77273,4.211,-0.165955
330,2023-01-01,34086.89,2.83,4.791,-0.1875,2.451,1.322861,324.815,8.047648,0.715643,4.204,-0.166231


In [549]:
# Data extends to January 2023

In [550]:
# Sample dataframe to look through
merged_df.sample(20)

Unnamed: 0,date_time,DJIA_Price,DJIA_Change_Percent,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly)
17,1996-12-01,6448.26,-1.13,1.424,1.496793,1.3,-0.076864,149.4,5.583039,0.673854,2.727,-0.691916
265,2017-08-01,21948.1,0.26,3.732,-0.586042,2.025,2.948653,246.4,0.486118,0.047506,3.168,-1.584343
296,2020-03-01,21917.16,-13.74,3.881,0.413972,1.994,0.40282,255.786,2.34797,0.09235,3.248,1.627034
291,2019-10-01,27046.23,0.48,3.841,-0.259673,1.948,0.671835,249.966,1.006974,0.400451,3.119,0.548034
257,2016-12-01,19762.6,3.34,3.559,-1.056436,1.96,-3.162055,242.631,-5.198936,-0.482349,3.29,0.304878
195,2011-10-01,11955.01,9.54,2.876,0.27894,2.041,-1.543657,227.017,7.393005,0.258798,3.622,-2.503365
249,2016-04-01,17773.64,0.5,3.815,-3.539823,1.915,0.314301,250.703,-3.209828,-0.320069,3.155,-1.004079
290,2019-09-01,26916.83,1.95,3.851,0.837916,1.935,-0.257732,248.969,0.389511,0.463643,3.102,1.871921
230,2014-09-01,17042.9,-0.32,4.096,2.068278,2.065,-0.337838,259.296,9.294611,0.870228,3.732,1.606316
311,2021-06-01,34502.51,-0.08,4.357,6.24238,2.026,-5.811251,281.243,0.637656,2.311106,3.557,1.715756


In [551]:
# Looking good

In [555]:
# Put DJIA_Change_Pct at left end of dataframe
col_list = list(merged_df.columns)
col_list.append(col_list.pop(1))
col_list

['date_time',
 'Beef $/LB',
 'Beef_Pct_Change (Monthly)',
 'Wheat_Price_(USD/LB)',
 'Wheat_Pct_Change (Monthly)',
 'Food - Index (1982-1984=100)',
 'Food - Pct_Change (Yearly)',
 'Food - Pct_Change (Monthly)',
 'Milk Cost per Gallon',
 'Milk_Pct_Change (Monthly)',
 'DJIA_Price',
 'DJIA_Change_Percent']

In [556]:
merged_df = merged_df[col_list]

In [557]:
merged_df.head()

Unnamed: 0,date_time,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly),DJIA_Price,DJIA_Change_Percent
0,1995-07-01,1.365,2.4006,1.147,-0.606586,138.2,0.582242,0.290276,2.477,,4708.47,3.34
1,1995-08-01,1.328,-2.710623,1.161,1.220575,138.8,1.166181,0.434153,2.482,0.201857,4610.56,-2.08
2,1995-09-01,1.376,3.614458,1.159,-0.172265,139.5,1.528384,0.504323,2.459,-0.926672,4789.08,3.87
3,1995-10-01,1.371,-0.363372,1.175,1.3805,140.6,2.852963,0.78853,2.473,0.569337,4755.48,-0.7
4,1995-11-01,1.368,-0.218818,1.169,-0.510638,141.0,3.296703,0.284495,2.493,0.808734,5074.49,6.71


In [558]:
# Add categorical column reflecting if the DJIA went up or down

# 0 for all negative months
# 1 for all positive months (including no change)

cat_data = []

for i in merged_df["DJIA_Change_Percent"].values:
    if i < 0:
        cat_data.append("0")
    if i >= 0:
        cat_data.append("1")
cat_data

['1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '1',
 '0',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '1',
 '0',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '0',
 '1',
 '1',
 '1',
 '0',
 '1',
 '0',
 '0',
 '1',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '1',
 '0',
 '1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '1',
 '0',
 '0',
 '1',
 '0',
 '0',
 '0',
 '1',
 '1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '0',
 '0',
 '1',
 '1',
 '0',
 '1',
 '0',
 '0',
 '1',
 '1',
 '1',
 '0',
 '1',
 '1',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '1',
 '1',
 '0',
 '0',
 '0',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '1',
 '0',
 '0',
 '0',
 '1',
 '0',
 '1',
 '0',
 '0',
 '1',
 '1',
 '0',
 '1',
 '0',
 '0',
 '1',
 '0',
 '1',
 '0',
 '1',
 '0',
 '1',
 '0',
 '1',
 '1',
 '1',
 '1',
 '0',
 '0',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '1',
 '0',
 '1',
 '1',
 '1',
 '0',
 '0',
 '1',
 '1',
 '1',
 '0',
 '0',
 '0',
 '0',
 '0',
 '1',
 '0',
 '0',
 '1',
 '1',
 '0',
 '0',
 '0',
 '0',
 '0',
 '0',
 '1',
 '1',
 '1'

In [559]:
print(len(cat_data))
print(len(merged_df))

331
331


In [560]:
merged_df["DJIA_change"] = cat_data
merged_df.head()

Unnamed: 0,date_time,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly),DJIA_Price,DJIA_Change_Percent,DJIA_change
0,1995-07-01,1.365,2.4006,1.147,-0.606586,138.2,0.582242,0.290276,2.477,,4708.47,3.34,1
1,1995-08-01,1.328,-2.710623,1.161,1.220575,138.8,1.166181,0.434153,2.482,0.201857,4610.56,-2.08,0
2,1995-09-01,1.376,3.614458,1.159,-0.172265,139.5,1.528384,0.504323,2.459,-0.926672,4789.08,3.87,1
3,1995-10-01,1.371,-0.363372,1.175,1.3805,140.6,2.852963,0.78853,2.473,0.569337,4755.48,-0.7,0
4,1995-11-01,1.368,-0.218818,1.169,-0.510638,141.0,3.296703,0.284495,2.493,0.808734,5074.49,6.71,1


In [561]:
# Check for accuracy of new column

In [569]:
merged_df.sample(20)

Unnamed: 0,date_time,Beef $/LB,Beef_Pct_Change (Monthly),Wheat_Price_(USD/LB),Wheat_Pct_Change (Monthly),Food - Index (1982-1984=100),Food - Pct_Change (Yearly),Food - Pct_Change (Monthly),Milk Cost per Gallon,Milk_Pct_Change (Monthly),DJIA_Price,DJIA_Change_Percent,DJIA_change
174,2010-01-01,2.279,4.254346,1.759,0.11383,201.693,-3.255932,0.26596,3.236,4.219002,10067.33,-3.46,0
285,2019-04-01,3.775,1.342282,1.952,-0.509684,249.022,-1.076935,-0.358518,2.98,1.222826,26592.91,2.56,1
79,2002-02-01,1.7,-1.960784,1.465,-1.346801,161.9,1.441103,-0.184957,2.807,-0.177809,10106.13,1.88,1
13,1996-08-01,1.391,2.884615,1.307,2.913386,145.3,4.682997,0.623269,2.666,0.641752,5616.2,1.58,1
262,2017-05-01,3.559,0.338314,1.974,-1.986097,243.922,-2.174114,0.095203,3.242,-0.521632,21008.65,0.33,1
209,2012-12-01,3.08,-2.992126,1.925,-3.314917,232.919,1.509664,0.333412,3.58,1.244344,13104.14,0.6,1
120,2005-07-01,2.299,-1.668092,1.375,1.626016,184.6,0.544662,-0.485175,3.09,-1.024984,10640.91,3.56,1
199,2012-02-01,2.947,-1.930116,2.044,3.024194,229.648,5.771543,-0.484042,3.52,-1.758303,12952.07,2.53,1
265,2017-08-01,3.732,-0.586042,2.025,2.948653,246.4,0.486118,0.047506,3.168,-1.584343,21948.1,0.26,1
177,2010-04-01,2.364,5.535714,1.821,1.561629,205.426,-0.22294,1.053698,3.14,-1.505646,11008.61,1.4,1


In [563]:
# Looks good

In [575]:
merged_df.isnull().sum()

date_time                       0
Beef $/LB                       0
Beef_Pct_Change (Monthly)       0
Wheat_Price_(USD/LB)            0
Wheat_Pct_Change (Monthly)      0
Food - Index (1982-1984=100)    0
Food - Pct_Change (Yearly)      0
Food - Pct_Change (Monthly)     0
Milk Cost per Gallon            0
Milk_Pct_Change (Monthly)       1
DJIA_Price                      0
DJIA_Change_Percent             0
DJIA_change                     0
dtype: int64

In [576]:
merged_df = merged_df.dropna()

In [577]:
merged_df.isnull().sum()

date_time                       0
Beef $/LB                       0
Beef_Pct_Change (Monthly)       0
Wheat_Price_(USD/LB)            0
Wheat_Pct_Change (Monthly)      0
Food - Index (1982-1984=100)    0
Food - Pct_Change (Yearly)      0
Food - Pct_Change (Monthly)     0
Milk Cost per Gallon            0
Milk_Pct_Change (Monthly)       0
DJIA_Price                      0
DJIA_Change_Percent             0
DJIA_change                     0
dtype: int64

In [578]:
# Export to new CSV file for storage
output_path = "C:/Users/jhillman/OneDrive/Desktop/Data Analytics Bootcamp/Three_Meals/Edited Data/Output/Correlation_data.csv"
merged_df.to_csv(output_path, index=False)