In [116]:
pip install numpy


[notice] A new release of pip available: 22.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip
Note: you may need to restart the kernel to use updated packages.


In [None]:
import csv

input_file = 'waste.csv'
output_file = 'waste.csv'

# Read the input CSV file
with open(input_file, 'r') as file:
    lines = file.readlines()

# Replace tab delimiter with comma
lines = [line.replace('\t', ',') for line in lines]

# Write the updated lines to the output CSV file
with open(output_file, 'w') as file:
    file.writelines(lines)

In [138]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

df = pd.read_csv('waste.csv', usecols=["country", "year", "material","stream", "value"], encoding = "ISO-8859-1", on_bad_lines="skip")
# print(df)
# Read the data from your CSV file or any other data source
# Assuming you have a DataFrame named 'df' with columns: 'country', 'year', 'stream', 'material', 'waste_generated'
filtered_data = df[(df['country'] == "Australia") & (df['stream'] == "Waste generated")]
# Filter the data for a particular country (e.g., 'Country_Name') for which you want to make predictions

unique_variables = filtered_data['material'].unique()

for each in unique_variables: 
    temp_data = filtered_data[filtered_data["material"] == each]
    # Prepare the feature set (X) and target variable (y)
    X = temp_data[['year']].values
    y = temp_data['value'].values
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize the Linear Regression model
    model = LinearRegression()
    
    # Train the model on the training data
    model.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = model.predict(X_test)
    
    # Calculate the Root Mean Squared Error (RMSE) as an evaluation metric
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    # print(f"Root Mean Squared Error: {rmse}")
    
    # Now, you can use the trained model to make predictions for future years
    future_years = [2025, 2030, 2035]  # Replace with the years for which you want predictions
    future_years = np.array(future_years).reshape(-1, 1)
    future_predictions = model.predict(future_years)
    
    print(f"\nPredicted waste generation rates for {selected_country} for {each} in future years:")
    
    # print("\nFor 2015", temp_data[(temp_data['year'] == 2015)])
    # print("\nFor 2017",temp_data[(temp_data['year'] == 2017)])
    # print("\nFor 2019",temp_data[(temp_data['year'] == 2019)])
    # print("\nFor 2020",temp_data[(temp_data['year'] == 2020)])
    # print("\nFor 2021",temp_data[(temp_data['year'] == 2021)])
    for year, prediction in zip(future_years.flatten(), future_predictions):
        print(f"\nYear {year}: {prediction}")



Predicted waste generation rates for Australia for Paper, paperboard and paper products in future years:

Year 2025: 7082.95071428566

Year 2030: 7863.112142857106

Year 2035: 8643.273571428552

Predicted waste generation rates for Australia for Glass in future years:

Year 2025: 1281.0255142857059

Year 2030: 1370.5079428571407

Year 2035: 1459.9903714285683

Predicted waste generation rates for Australia for Plastics in future years:

Year 2025: 2493.0886571428564

Year 2030: 2453.7253714285707

Year 2035: 2414.362085714287

Predicted waste generation rates for Australia for All metals in future years:

Year 2025: 5850.796857142865

Year 2030: 6030.0375714285765

Year 2035: 6209.278285714288

Predicted waste generation rates for Australia for Masonry materials in future years:

Year 2025: 33705.78971428564

Year 2030: 42306.16614285717

Year 2035: 50906.54257142823

Predicted waste generation rates for Australia for Organics in future years:

Year 2025: 10426.97362857149

Year 2030:

In [60]:
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import NearestNeighbors
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html

# Read CSV file and create DataFrame
df = pd.read_csv('WaterUse_2.csv', usecols=["country", "year", "pollution_rate","Variable"], encoding = "ISO-8859-1", on_bad_lines="skip")
print(df)

        country                                      Variable  year  \
0     Australia          Public water supply - non-freshwater  2005   
1     Australia          Public water supply - non-freshwater  2010   
2     Australia          Public water supply - non-freshwater  2015   
3     Australia          Public water supply - non-freshwater  2016   
4     Australia          Public water supply - non-freshwater  2017   
...         ...                                           ...   ...   
6727   Colombia  Self- and other supply - Other manufacturing  2016   
6728   Colombia  Self- and other supply - Other manufacturing  2017   
6729   Colombia  Self- and other supply - Other manufacturing  2018   
6730   Colombia  Self- and other supply - Other manufacturing  2019   
6731   Colombia  Self- and other supply - Other manufacturing  2020   

      pollution_rate  
0                NaN  
1                NaN  
2            144.152  
3            148.908  
4            202.822  
...      

In [115]:
missing_rows = df[df['pollution_rate'].isnull()]
pollution_data = df.groupby('country')['pollution_rate'].apply(list).to_dict()

# Perform linear interpolation for each missing row
for index, row in missing_rows.iterrows():
    country = row['country']
    years = df[df['country'] == country]['year'].tolist()
    pollution_rates = pollution_data[country]
    missing_year = row['year']
    
    # Perform linear interpolation
    f = interpolate.interp1d(years, pollution_rates, kind='linear')
    interpolated_rate = f(missing_year)
    
    # Update the missing value with the interpolated rate
    df.at[index, 'pollution_rate'] = interpolated_rate

# For Dash
app = dash.Dash(__name__)
pivot_table2=""

# Create the layout of the app
app.layout = html.Div(children=[
    html.H1("Country Pollution Rates"),
    html.Div(children=[
        html.Label("Select Year"),
        dcc.Dropdown(
            id='year-dropdown',
            options=[{'label': year, 'value': year} for year in df['year'].unique()],
            value=df['year'].min()
            
        ),
        html.Label("Select Variable"),
        dcc.Dropdown(
            id='variable-dropdown'
        ),
        html.Label("Select Country"),
        dcc.Dropdown(
            id='country-dropdown'
        ),
        dcc.Graph(id='country-pollution-graph')
    ])
])

@app.callback(
    dash.dependencies.Output('variable-dropdown', 'options'),
    [dash.dependencies.Input('year-dropdown', 'value')]
    
)
def update_variable_dropdown(selected_year):
    # Filter DataFrame based on selected year
    filtered_df = df[df['year'] == selected_year]

    # Get unique variables for the selected year
    unique_variables = filtered_df['Variable'].unique()

    # Generate dropdown options for variables
    dropdown_options = [{'label': variable, 'value': variable} for variable in unique_variables]

    return dropdown_options

@app.callback(
    dash.dependencies.Output('country-dropdown', 'options'),
    [dash.dependencies.Input('variable-dropdown', 'value')]
   
)
def update_country_dropdown(selected_variable):

    
    filtered_df = df[(df['year'] == selected_year) & (df['Variable'] == selected_variable)]
    print(selected_year)
    # Pivot the data to have countries as columns and pollution rates as rows
    pivot_table = filtered_df.pivot(index='Variable', columns='country', values='pollution_rate')
    pivot_table2 = filtered_df.pivot(index='country', columns='year', values='pollution_rate')
    # print(pivot_table2)
    no_of_wanted_neighbors = 0
   
    if(len(pivot_table2.index) <= 1):
        no_of_wanted_neighbors = 1
    elif(len(pivot_table2.index) == 2):
        no_of_wanted_neighbors = 2
    elif(len(pivot_table2.index) == 3):
        no_of_wanted_neighbors = 3
    else:
        no_of_wanted_neighbors = 4
    neigh = NearestNeighbors(n_neighbors=no_of_wanted_neighbors)
    neigh.fit(pivot_table2.values)
    distances, indices = neigh.kneighbors(pivot_table2.values)
    print(pivot_table2)

    countries = pivot_table2.index
    print(countries)
    
    
    
    # print(countries)
    # Generate dropdown options for countries
    dropdown_options = [{'label': country, 'value': country} for country in countries]

    return dropdown_options

@app.callback(
    dash.dependencies.Output('country-pollution-graph', 'figure'),
    [dash.dependencies.Input('year-dropdown', 'value'),
     dash.dependencies.Input('variable-dropdown', 'value'),
     dash.dependencies.Input('country-dropdown', 'value')]
)
def update_graph(selected_year, selected_variable, selected_country):


    filtered_df = df[(df['year'] == selected_year) & (df['Variable'] == selected_variable)]

    # Pivot the data to have countries as columns and pollution rates as rows
    pivot_table = filtered_df.pivot(index='Variable', columns='country', values='pollution_rate')
    pivot_table2 = filtered_df.pivot(index='country', columns='year', values='pollution_rate')
    no_of_wanted_neighbors = 0
   
    if(len(pivot_table.index) == 1):
        no_of_wanted_neighbors = 1
    elif(len(pivot_table.index) == 2):
        no_of_wanted_neighbors = 2
    elif(len(pivot_table.index) == 3):
        no_of_wanted_neighbors = 3
    else:
        no_of_wanted_neighbors = 4
    neigh = NearestNeighbors(n_neighbors=3)
    neigh.fit(pivot_table2.values)
    distances, indices = neigh.kneighbors(pivot_table2.values)
    print(pivot_table2)
    
    # print(pivot_table.loc[countries["Australia"],selected_year])
    # Plot countries with pollution rates
    data = []

    for country_index, country in enumerate(pivot_table2.index):
    
        if(country == selected_country):
            justlist = pivot_table2.index[indices[country_index]].to_list()
            for countryyy in justlist:
                data.append(go.Bar(x=[countryyy], y=[pivot_table2.loc[countryyy, selected_year]], name=countryyy))
#         justlist.remove(country)
        
#         # similar_countries = ', '.join(justlist)
#         for each in justlist:
#             print(each, "with pollution rate:", pivot_table.loc[each,selected_year])
#         # print(f"{country}: {similar_countries}")
    
    
        

    layout = go.Layout(
        title=f"Countries with pollution rates for {selected_variable} in {selected_year}",
        xaxis={'title': 'Country'},
        yaxis={'title': 'Pollution Rate'}
    )

    return {'data': data, 'layout': layout}


if __name__ == '__main__':
    app.run_server(debug=True)
# For Dash

# print("To see the similar countries first provide the year, for which you want to see the graph")
# unique_years = df['year'].unique()
# for year in unique_years:
    # print(year)
# selected_year = int(input("Enter the year: "))
# selected_year = 2005
# filtered_df = df[df['year'] == selected_year]

# print("To see the similar countries first provide the variable, for which you want to see the graph")
# unique_variables = filtered_df['Variable'].unique()
# for index, value in enumerate(unique_variables):
    # print(f"Index: {index}, Variable: {value}")

# selected_variable = unique_variables[int(input("Enter the Index: "))]
# selected_variable = unique_variables[60]
# print(selected_variable)

# final_filtered_df = filtered_df[filtered_df['Variable'] == selected_variable]

# pivot_table = final_filtered_df.pivot(index='country', columns='year', values='pollution_rate')
# # print(len(pivot_table.index))
# copy_table = pivot_table
# if(len(pivot_table.index) <= 3 ): 
#     print("We only have 3 countries for this criteria so result might not be accurate.")
#     neigh = NearestNeighbors(n_neighbors=2)
    
# else: 
#     neigh = NearestNeighbors(n_neighbors=3)
# neigh.fit(pivot_table.values)
# distances, indices = neigh.kneighbors(pivot_table.values)
# # print(pivot_table)
# countries = pivot_table.index
# # print(pivot_table.loc["Belgium",2005])
# for index, value in enumerate(countries):
#     print(f"Country Number: {index}, Country: {value}")
# selected_country = int(input("Enter the Country Number for which you want the Similar Countries: "))
# print(f"Countries with similar pollution rates to {countries[selected_country]} with pollution rate {pivot_table.loc[countries[selected_country],selected_year]}  in {year} for {selected_variable}:")
# for country_index, country in enumerate(pivot_table.index):
    
#     if(country_index == selected_country):
#         justlist = pivot_table.index[indices[country_index]].to_list()
#         justlist.remove(country)
        
#         # similar_countries = ', '.join(justlist)
#         for each in justlist:
#             print(each, "with pollution rate:", pivot_table.loc[each,selected_year])
#         # print(f"{country}: {similar_countries}")
# # print()




2005
[1;31m---------------------------------------------------------------------------[0m
[1;31mValueError[0m                                Traceback (most recent call last)
File [1;32m~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py:1151[0m, in [0;36m_fit_context.<locals>.decorator.<locals>.wrapper[1;34m(
    estimator=NearestNeighbors(n_neighbors=1),
    *args=(array([], shape=(0, 0), dtype=float64),),
    **kwargs={}
)[0m
[0;32m   1144[0m     estimator[38;5;241m.[39m_validate_params()
[0;32m   1146[0m [38;5;28;01mwith[39;00m config_context(
[0;32m   1147[0m     skip_parameter_validation[38;5;241m=[39m(
[0;32m   1148[0m         prefer_skip_nested_validation [38;5;129;01mor[39;00m global_skip_validation
[0;32m   1149[0m     )
[0;32m   1150[0m ):
[1;32m-> 1151[0m     [38;5;28;01mreturn[39;00m fit_method(estimator, [38;5;241m*[39margs, [38;5;241m*[39m[38;5;241m*[39mkwargs)
        fit_method [1;34m= <function NearestNei