In [None]:
import pandas as pd
from dash import Input, Output, Dash, html, dcc
import plotly.graph_objects as go
import plotly.express as pt
import numpy as np
import seaborn as sns
import matplotlib.pyplot as mpt

df = pd.read_csv(r'C:\Users\1\Downloads\island_scrape.csv', delimiter = ',')

df.drop(['Unnamed: 0','image'], inplace = True, axis = 1)

df['scrape_date'] = pd.to_datetime(df['scrape_date'])

regions = list(df['region'].str.split(', '))
cont = [i[-1] for i in regions]
df['continent'] = cont
df.head()

def meana(x):
    if x[0].isdigit():
        z = [float(i) for i in x]
        return sum(z)/len(z)
    else:
        return pd.NA

acr = list(df['acreage'])
acr = list(map(lambda x: str(x).split(' - '),acr))
acr = list(map(meana,acr))
df['acreage'] = acr

df.dropna(inplace = True)

df['price'] = df['price'].str.replace(',', '')
df['price'] = df['price'].str.replace('.','')

df_with_prices = df[df['price'].str.isdigit()]

df_with_prices['price'] = df_with_prices['price'].astype(float)

df_with_prices['price'] = df_with_prices['price']/100000000

df_with_prices.head()

def find_unique(x):
    z = []
    for i in x:
        if type(i) == list:        
            for j in i:
                if j not in z:
                    z.append(j)
        else:
            continue
    return(z)


facilities = list(df_with_prices['tags'].str.split(','))
unique_facilities = find_unique(facilities)
for i in unique_facilities:
    df_with_prices[i] = np.nan
df

df_with_pricess = df_with_prices.reset_index()
df_with_pricess.head()

df_with_pricess.drop('index', axis = 1, inplace = True)

df_with_pricess.head()

flag = 0
for i in facilities:
    if type(i) == list:
        for j in i:
            df_with_pricess.loc[flag, j] = True
        flag += 1
    else:
        continue

df_with_pricess[unique_facilities] = df_with_pricess[unique_facilities].fillna(False,axis = 1)

df_with_pricess.head()

from sklearn.metrics import r2_score

from sklearn.tree import DecisionTreeRegressor
from dmba import plotDecisionTree
import pydotplus
from sklearn.model_selection import train_test_split

X2 = df_with_pricess.drop(['name','price', 'tags', 'region','scrape_date', 'continent'], axis = 1)
y2 = df_with_pricess['price']

x_train2,x_test2,y_train2,y_test2 = train_test_split(X2,y2,test_size = 0.2, random_state = 0)

model2 = DecisionTreeRegressor()

model2.fit(x_train2,y_train2)

y_predict_tree = model2.predict(x_train2)

r2_score(y_train2,y_predict_tree)

df_with_pricess.isnull().sum()

## Moving to predicting NaN price values from our initial Dataset

pred_df = df[(df['price'].str.isdigit() != True) | (df['price'].isnull())]

for i in unique_facilities:
    pred_df[i] = np.nan

facilities_of_pred = list(pred_df['tags'].str.split(','))

pred_df.reset_index(inplace = True)
pred_df.drop('index', axis = 1, inplace= True)
pred_df.head()

flag = 0
for i in facilities:
    if type(i) == list:
        for j in i:
            pred_df.loc[flag, j] = True
        flag += 1
    else:
        continue
    
        
    

pred_df.isnull().sum()

pred_df[unique_facilities] = pred_df[unique_facilities].fillna(False, axis = 1)

pred_df.isnull().sum()

final_x_arg = pred_df[['acreage']+unique_facilities]

final_y_predict_price = model2.predict(final_x_arg)

pred_df['price'] = final_y_predict_price

pred_df.head()

final_df = df_with_pricess.append(pred_df)

# writing to Excel
datatoexcel = pd.ExcelWriter('Islands2.xlsx')
  
# write DataFrame to excel
final_df.to_excel(datatoexcel)
  
# save the excel
datatoexcel.save()
print('DataFrame is written to Excel File successfully.')

fig2 = pt.histogram(final_df[final_df['price']<= 60], x = 'price', nbins = 50, labels = {'price': 'price (mln $)'}, title = 'The distribution of islands prices')
fig2.show()

app = Dash()

app.layout = html.Div(children = [
    html.Div([dcc.Graph(id = 'first'),
                         dcc.Dropdown(id = 'tag', options = unique_facilities, value = 'Large Acreage')]),
    html.Div([dcc.Graph(id = 'second', figure = fig2)])

                                    ])

@app.callback(
Output('first', 'figure'),
Input('tag', 'value'))
def what_to_show(tag):
    fig1 = pt.histogram(final_df, x = 'continent',labels = {'continent': 'Continent'}, title = "The distribution among continents", color = tag)
    return fig1

if __name__ == '__main__':
    app.run_server(debug = False, port = 1000)