In [19]:
print(df.columns.tolist())


['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', 'Unnamed: 69']


In [20]:
# Filter for Germany and the right indicator
df_germany = df[
    (df["Country Name"] == "Germany") &
    (df["Indicator Name"].str.contains("Debt", case=False))
].copy()


In [21]:
# Convert year columns into rows
df_germany_melted = df_germany.melt(
    id_vars=["Country Name", "Indicator Name"],
    value_vars=[str(year) for year in range(1990, 2025)],
    var_name="Year",
    value_name="DebtPercentGDP"
)


In [22]:
# Clean the data
df_germany_melted.dropna(subset=["DebtPercentGDP"], inplace=True)
df_germany_melted["Year"] = df_germany_melted["Year"].astype(int)
df_germany_melted["DebtPercentGDP"] = pd.to_numeric(df_germany_melted["DebtPercentGDP"], errors="coerce")


In [23]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Prepare features and labels
X = df_germany_melted["Year"].values.reshape(-1, 1)
y = df_germany_melted["DebtPercentGDP"].values

model = LinearRegression()
model.fit(X, y)

# Forecast next 5 years
future_years = np.array(range(2024, 2029)).reshape(-1, 1)
predictions = model.predict(future_years)

forecast_df = pd.DataFrame({
    "Country": "Germany",
    "Year": future_years.flatten(),
    "PredictedDebtPercentGDP": predictions
})


In [24]:
# Save to CSV
forecast_df.to_csv("forecast_debt_germany.csv", index=False)


In [25]:
actual_df = df_germany_melted[["Year", "DebtPercentGDP"]].copy()
actual_df.rename(columns={"DebtPercentGDP": "Value"}, inplace=True)
actual_df["Type"] = "Actual"


In [16]:
forecast_df.rename(columns={"PredictedDebtPercentGDP": "Value"}, inplace=True)
forecast_df["Type"] = "Forecast"


In [17]:
combined_df = pd.concat([actual_df, forecast_df], ignore_index=True)
combined_df.to_csv("actual_vs_forecast_debt.csv", index=False)
