In [90]:
import pandas as pd 
import numpy as np 
import plotly.express as px

In [91]:
ses = pd.read_csv('../Datasets/PROCESSED/ses_skorlari.csv')
ses.head(3)

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES,SES SKORU
0,ADALAR,BURGAZADA,B,62.5
1,ADALAR,HEYBELİADA,B,62.5
2,ADALAR,KINALIADA,C,37.5


In [92]:
ilk10 = ses.groupby('İLÇE ADI')['SES SKORU'].mean().reset_index().sort_values('SES SKORU', ascending=False).head(5)
ilk10['SES SKORU'] = np.round(ilk10['SES SKORU'], 2)

In [93]:
son15 = ses.groupby('İLÇE ADI')['SES SKORU'].mean().reset_index().sort_values('SES SKORU', ascending=True).head(15)
son15['SES SKORU'] = np.round(son15['SES SKORU'], 2)

In [94]:
fig = px.bar(ilk10, y='İLÇE ADI', x='SES SKORU', color='İLÇE ADI', title="En Yüksek SES Skorları")
fig.update_layout(title_x=0.15)

# set graph width smaller
fig.update_layout(width=600) 
fig.show()

In [95]:
fig = px.bar(son15, x='İLÇE ADI', y='SES SKORU', color='İLÇE ADI', title='İstanbul\'un En Düşük SES Skoruna Sahip İlçeleri')
fig.update_layout(title_x=0.07, height=350)
# no legends
fig.update_layout(showlegend=False)
fig.show()

In [96]:
ses['SES'].unique()

array(['B', 'C', 'B+', 'E', 'D', 'A', 'C+', 'A+'], dtype=object)

In [97]:
ses_kategorisi = ses[ses['SES'].isin(['A+','A', 'B+'])].groupby(['İLÇE ADI','SES']).size().reset_index(name='SES Adeti').sort_values('SES', ascending=True).sort_values('SES Adeti', ascending=False).head(20)

In [98]:
fig = px.bar(ses_kategorisi, x='İLÇE ADI', y='SES Adeti', color='SES', title='İstanbul\'un SES Skorlarına Göre İlçeleri', color_continuous_scale=px.colors.sequential.RdBu)
fig.update_layout(title_x=0.5, width=800)
fig.show()

In [99]:
maaliyet = pd.read_csv('../Datasets/PROCESSED/nufus_maaliyet.csv')
maaliyet.head(3)

Unnamed: 0,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet
0,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000
1,ARNAVUTKOY,YUNUS EMRE,14919.0,16085,1.1,201000
2,ARNAVUTKOY,BOGAZKOY ATATURK,11965.0,7726,0.65,201000


In [100]:
fig = px.bar(maaliyet.groupby('ilce')['maliyet'].mean().reset_index().sort_values('maliyet', ascending=True).head(20), x='ilce', y='maliyet', color='ilce', title='İstanbul\'un En Düşük Maliyetli İlçeleri')
fig.update_layout(title_x=0.5)
fig.show()

TEK TEK İNCELEMEK YERİNE VERİ SETLERİNİ BİRLEŞTİRİP SKORLAMA YAPIP EN OPTİMAL MAHALLEYİ BULMAYA ÇALIŞACAĞIM.

In [101]:
turkish_cases = {
    'İ':'I',
    'Ç':'C',
    'Ş':'S',
    'Ğ':'G',
    'Ü':'U',
    'Ö':'O',
    'ı':'i',
    'ç':'c',
    'ş':'s',
    'ğ':'g',
    'ü':'u',
    'ö':'o'
}

In [102]:
ses['MAHALLE ADI'] = ses['MAHALLE ADI'].replace(turkish_cases, regex=True)
ses['MAHALLE ADI'] = ses['MAHALLE ADI'].str.replace(' ', '')

In [103]:
ses['İLÇE ADI'] = ses['İLÇE ADI'].replace(turkish_cases, regex=True)
ses['İLÇE ADI'] = ses['İLÇE ADI'].str.replace(' ', '')

In [104]:
ses.loc[ses['İLÇE ADI']=='EYUP', 'İLÇE ADI'] = 'EYUPSULTAN'

In [105]:
maaliyet['mahalle'] = maaliyet['mahalle'].replace(turkish_cases, regex=True)
maaliyet['mahalle'] = maaliyet['mahalle'].str.replace(' ', '')

In [106]:
database = pd.merge(ses, maaliyet, left_on=['MAHALLE ADI', 'İLÇE ADI'], right_on=['mahalle', 'ilce'], how='inner')
database.head(3)

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES,SES SKORU,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet
0,ARNAVUTKOY,ANADOLU,E,12.5,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000
1,ARNAVUTKOY,ARNAVUTKOYMERKEZ,D,25.0,ARNAVUTKOY,ARNAVUTKOYMERKEZ,2771.0,18695,6.7,201000
2,ARNAVUTKOY,BAKLALI,E,12.5,ARNAVUTKOY,BAKLALI,46.9,825,17.6,201000


ağırlıklandırmanın yapılması
* Eşit ağırlıklandırma ile:

In [107]:
agirlik_dict = {
    'Nüfus Yoğunluğu':15,
    'Nüfus':15,
    'SES SKORU':25,
    'Maaliyet':25
}
agirlik_df = pd.DataFrame(agirlik_dict.items(), columns=['Kriter', 'Ağırlık'])
agirlik_df.to_csv('../Datasets/EXTERNAL/agirliklandirma.csv', index=False)

In [108]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
database["nufus_yogunlugu_agirlik"] = scaler.fit_transform(database[["nufus_yogunlugu"]])

scaler = MinMaxScaler()
database["nufus_agirlik"] = scaler.fit_transform(database[["nufus"]])

scaler = MinMaxScaler()
database["maliyet_agirlik"] = 1 - scaler.fit_transform(database[["maliyet"]])

scaler = MinMaxScaler()
database["ses_agirlik"] = scaler.fit_transform(database[["SES SKORU"]])

database["agirlikli_puan"] = (database["nufus_yogunlugu_agirlik"] * 0.25
                            + database["nufus_agirlik"] * 0.25
                            + database["maliyet_agirlik"] * 0.25
                            + database["ses_agirlik"] * 0.25)

In [109]:
database.head()

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES,SES SKORU,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet,nufus_yogunlugu_agirlik,nufus_agirlik,maliyet_agirlik,ses_agirlik,agirlikli_puan
0,ARNAVUTKOY,ANADOLU,E,12.5,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000,0.223691,0.355769,0.976303,0.0,0.388941
1,ARNAVUTKOY,ARNAVUTKOYMERKEZ,D,25.0,ARNAVUTKOY,ARNAVUTKOYMERKEZ,2771.0,18695,6.7,201000,0.028643,0.200408,0.976303,0.142857,0.337053
2,ARNAVUTKOY,BAKLALI,E,12.5,ARNAVUTKOY,BAKLALI,46.9,825,17.6,201000,0.000435,0.0087,0.976303,0.0,0.24636
3,ARNAVUTKOY,BALABAN,E,12.5,ARNAVUTKOY,BALABAN,13.8,413,29.8,201000,9.2e-05,0.00428,0.976303,0.0,0.245169
4,ARNAVUTKOY,BOGAZKOYISTIKLAL,D,25.0,ARNAVUTKOY,BOGAZKOYISTIKLAL,337.0,9925,29.4,201000,0.003439,0.106324,0.976303,0.142857,0.307231


In [110]:
en_iyi_esit = database[['İLÇE ADI', 'MAHALLE ADI', 'SES SKORU', 'nufus', 'nufus_yogunlugu', 'maliyet', 'agirlikli_puan']]
en_iyi_esit = en_iyi_esit.sort_values('agirlikli_puan', ascending=False)

In [111]:
fig = px.bar(en_iyi_esit.head(10), y='MAHALLE ADI', x='agirlikli_puan', color='İLÇE ADI')
fig.update_layout(title_x=0.5, height=600, width = 800)
fig.show()

In [112]:
x = en_iyi_esit.groupby('İLÇE ADI')['agirlikli_puan'].sum().reset_index().sort_values('agirlikli_puan', ascending=False).head(10)
x['agirlikli_puan'] = np.round(x['agirlikli_puan'], 2)
x.to_csv('../Datasets/EXTERNAL/en_iyi_esit2.csv', index=False)

In [113]:
database['agirlikli_puan'].describe()

count    900.000000
mean       0.341465
std        0.106616
min        0.000929
25%        0.253388
50%        0.342297
75%        0.418869
max        0.723707
Name: agirlikli_puan, dtype: float64

Hepsine eşit ağırlık verdiğimiz ağırlıklandırma sonucunda mantıklı sonuçlar elde edemedik. Bunun sebebinin SES Segmenti filtrelemesi yapmadan ağırlıklandırma yapmaya girişmek olduğunu düşünüyorum.

Bir sonraki adımım şu olacak; Sosyal yardımın en fazla olduğu yerleri inceleyeceğim ve oradaki SES Segmentinin sınırını ve o SES Segmentinin altında kalan yerleri atıp daha yukarıdaki SES Segmentindeki yerleri araştıracağım ve ağırlıklandırılmış puanı ona göre hesaplayacağım.

In [114]:
yardım = pd.read_csv('../Datasets/PROCESSED/sosyal_yardim.csv')
yardım.head()

Unnamed: 0,İLÇE,MAHALLE,HANE SAYISI
0,ADALAR,BURGAZADA,5
1,ADALAR,HEYBELIADA,22
2,ADALAR,KINALIADA,13
3,ADALAR,MADEN,27
4,ADALAR,NIZAM,20


In [115]:
yardım['MAHALLE'] = yardım['MAHALLE'].str.replace(' ', '')

In [116]:
yardım_grouped = yardım.groupby(['İLÇE','MAHALLE'])['HANE SAYISI'].sum().reset_index().sort_values('HANE SAYISI', ascending=False)

In [117]:
yardım_grouped['HANE SAYISI'].describe()

count     946.000000
mean      244.560254
std       344.892762
min         1.000000
25%        13.000000
50%       105.000000
75%       346.750000
max      2987.000000
Name: HANE SAYISI, dtype: float64

In [118]:
fig = px.box(yardım_grouped, x='HANE SAYISI', title='İstanbul\'un İlçelerine Göre Sosyal Yardım Hane Sayıları')
fig.show()

In [119]:
will_drop = yardım_grouped[yardım_grouped['HANE SAYISI'] > 13]

In [120]:
will_drop.drop(['HANE SAYISI'], axis=1, inplace=True)
will_drop



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,İLÇE,MAHALLE
588,KUCUKCEKMECE,KANARYA
132,BASAKSEHIR,GUVERCINTEPE
71,AVCILAR,YESILKENT
111,BAHCELIEVLER,ZAFER
835,SULTANGAZI,ESENTEPE
...,...,...
149,BESIKTAS,AKAT
197,BEYKOZ,MAHMUTSEVKETPASA
857,TUZLA,ORHANLI
454,FATIH,HACIKADIN


In [121]:
will_drop['will drop'] = 1



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [122]:
will_drop.head()

Unnamed: 0,İLÇE,MAHALLE,will drop
588,KUCUKCEKMECE,KANARYA,1
132,BASAKSEHIR,GUVERCINTEPE,1
71,AVCILAR,YESILKENT,1
111,BAHCELIEVLER,ZAFER,1
835,SULTANGAZI,ESENTEPE,1


In [123]:
database = pd.merge(database, will_drop, left_on=['MAHALLE ADI', 'İLÇE ADI'], right_on=['MAHALLE', 'İLÇE'], how='left')
database.head(3)

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES,SES SKORU,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet,nufus_yogunlugu_agirlik,nufus_agirlik,maliyet_agirlik,ses_agirlik,agirlikli_puan,İLÇE,MAHALLE,will drop
0,ARNAVUTKOY,ANADOLU,E,12.5,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000,0.223691,0.355769,0.976303,0.0,0.388941,ARNAVUTKOY,ANADOLU,1.0
1,ARNAVUTKOY,ARNAVUTKOYMERKEZ,D,25.0,ARNAVUTKOY,ARNAVUTKOYMERKEZ,2771.0,18695,6.7,201000,0.028643,0.200408,0.976303,0.142857,0.337053,ARNAVUTKOY,ARNAVUTKOYMERKEZ,1.0
2,ARNAVUTKOY,BAKLALI,E,12.5,ARNAVUTKOY,BAKLALI,46.9,825,17.6,201000,0.000435,0.0087,0.976303,0.0,0.24636,,,


In [124]:
database['will drop'].fillna(0, inplace=True)

In [125]:
# drop
database = database[database['will drop'] == 0]

In [126]:
database.head(3)

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES,SES SKORU,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet,nufus_yogunlugu_agirlik,nufus_agirlik,maliyet_agirlik,ses_agirlik,agirlikli_puan,İLÇE,MAHALLE,will drop
2,ARNAVUTKOY,BAKLALI,E,12.5,ARNAVUTKOY,BAKLALI,46.9,825,17.6,201000,0.000435,0.0087,0.976303,0.0,0.24636,,,0.0
3,ARNAVUTKOY,BALABAN,E,12.5,ARNAVUTKOY,BALABAN,13.8,413,29.8,201000,9.2e-05,0.00428,0.976303,0.0,0.245169,,,0.0
5,ARNAVUTKOY,BOYALIK,E,12.5,ARNAVUTKOY,BOYALIK,24.8,674,27.1,201000,0.000206,0.00708,0.976303,0.0,0.245897,,,0.0


In [127]:
database['agirlikli_puan'].min(), database['agirlikli_puan'].max()

(0.0009290699598289741, 0.4638067894294825)

* Kişiselleştirilmiş Ağırlıklandırmma:

In [82]:
agirlik_dict = {
    'Nüfus Yoğunluğu':10,
    'Nüfus':15,
    'Maaliyet':30,
    'SES SKORU':45
}
agirlik_df = pd.DataFrame(agirlik_dict.items(), columns=['Kriter', 'Ağırlık'])
agirlik_df.to_csv('../Datasets/EXTERNAL/agirliklandirma.csv', index=False)

In [83]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
database["nufus_yogunlugu_agirlik"] = scaler.fit_transform(database[["nufus_yogunlugu"]])

scaler = MinMaxScaler()
database["nufus_agirlik"] = scaler.fit_transform(database[["nufus"]])

scaler = MinMaxScaler()
database["maliyet_agirlik"] = 1 - scaler.fit_transform(database[["maliyet"]])

scaler = MinMaxScaler()
database["ses_agirlik"] = scaler.fit_transform(database[["SES SKORU"]])

database["agirlikli_puan"] = (database["nufus_yogunlugu_agirlik"] * 0.20
                            + database["nufus_agirlik"] * 0.10
                            + database["maliyet_agirlik"] * 0.3
                            + database["ses_agirlik"] * 0.40)

database = database.sort_values('agirlikli_puan', ascending=False)

In [84]:
database = database[['İLÇE ADI', 'MAHALLE ADI', 'SES SKORU', 'nufus', 'nufus_yogunlugu', 'maliyet', 'agirlikli_puan']]

In [85]:
database.head(15)

Unnamed: 0,İLÇE ADI,MAHALLE ADI,SES SKORU,nufus,nufus_yogunlugu,maliyet,agirlikli_puan
482,KADIKOY,CAFERAGA,100.0,23977,19324.0,567000,0.658805
85,BAKIRKOY,ATAKOY7-8-9-10.KISIM,100.0,24444,11918.0,525000,0.656336
802,SISLI,TESVIKIYE,100.0,11522,25431.0,543000,0.649799
139,BEYKOZ,ACARLAR,100.0,7319,1346.0,363000,0.643938
497,KADIKOY,SUADIYE,100.0,22484,15157.0,567000,0.640568
485,KADIKOY,ERENKOY,87.5,31098,23013.0,567000,0.636464
28,ATASEHIR,ATATURK,87.5,30063,11843.0,486000,0.635512
83,BAKIRKOY,ATAKOY2-5-6.KISIM,100.0,13671,11131.0,525000,0.619141
487,KADIKOY,FENERYOLU,87.5,24488,24161.0,567000,0.6189
857,USKUDAR,ALTUNIZADE,87.5,13886,7833.0,393000,0.614617


In [86]:
# 3 özellikli scatter
fig = px.scatter(database, x='agirlikli_puan', y='nufus', color='İLÇE ADI', size='SES SKORU')
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                  width=900, height=600)
fig.show()

In [87]:
fig = px.bar(database.head(10), y='MAHALLE ADI', x='agirlikli_puan', color='İLÇE ADI')
fig.update_layout(title_x=0.5, height=600, width = 700)
fig.show()

In [88]:
database.to_csv("../Datasets/PROCESSED/database.csv", index=False)

In [89]:
ses.to_csv("../Datasets/PROCESSED/ses2.csv", index=False)