In [273]:
import pandas as pd
import numpy as np

Get neighborhood from lat & long

In [274]:
my_user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'

In [275]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent=my_user_agent)

def get_district_neighborhood(latitude, longitude):
    
    location = geolocator.reverse(f"{latitude}, {longitude}")
    address = location.raw['address']

    # country
    country = address.get('country', '')
    
    # city
    city = address.get('province', '')
    
    
    # district
    district = address.get('town', '')
    
    # neighborhood
    neighborhood = address.get('suburb', '').replace(' Mahallesi', '').upper()
        
    # zipcode
    zipcode = address.get('postcode', '')

    
    df = pd.DataFrame({'country': [country],
                        'city': [city],
                            'district': [district],
                                'neighborhood': [neighborhood],
                                    'zipcode': [zipcode]})
    
    return district, neighborhood

<hr>

In [276]:
turkish_cases = {
    'İ': 'I',
    'ı': 'i',
    'Ğ': 'G',
    'ğ': 'g',
    'Ü': 'U',
    'ü': 'u',
    'Ş': 'S',
    'ş': 's',
    'Ö': 'O',
    'ö': 'o',
    'Ç': 'C',
    'ç': 'c',
}

In [277]:
database = pd.read_csv('../Datasets/PROCESSED/sosyal_yardim.csv')
database.columns = ['ilce', 'mahalle', 'yardim_alan_hane_sayisi']
database.head(3)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi
0,ADALAR,BURGAZADA,5
1,ADALAR,HEYBELIADA,22
2,ADALAR,KINALIADA,13


In [278]:
ses = pd.read_csv('../Datasets/PROCESSED/ses_skorlari.csv')
ses.columns = ['ilce', 'mahalle','ses_klasmanı', 'ses_skoru']
ses.head(3)

Unnamed: 0,ilce,mahalle,ses_klasmanı,ses_skoru
0,ADALAR,BURGAZADA,B,62.5
1,ADALAR,HEYBELİADA,B,62.5
2,ADALAR,KINALIADA,C,37.5


In [279]:
ses['ilce'] = ses['ilce'].str.upper()
ses['mahalle'] = ses['mahalle'].str.upper()

ses['ilce'] = ses['ilce'].apply(lambda x: ''.join([turkish_cases.get(i, i) for i in x]))
ses['mahalle'] = ses['mahalle'].apply(lambda x: ''.join([turkish_cases.get(i, i) for i in x]))

In [280]:
one_matched = []
multiple_match = []
non_matched = []
for i in ses['ilce'].unique().tolist():
    ct = 0
    for j in database['ilce'].unique():
        if i == j:
            ct += 1
    if ct > 1:
        multiple_match.append(i)
    elif ct == 1:
        one_matched.append(i)
    else:
        non_matched.append(i)

one_matched, multiple_match, non_matched

(['ADALAR',
  'ARNAVUTKOY',
  'ATASEHIR',
  'AVCILAR',
  'BAGCILAR',
  'BAHCELIEVLER',
  'BAKIRKOY',
  'BASAKSEHIR',
  'BAYRAMPASA',
  'BESIKTAS',
  'BEYKOZ',
  'BEYLIKDUZU',
  'BEYOGLU',
  'BUYUKCEKMECE',
  'CATALCA',
  'CEKMEKOY',
  'ESENLER',
  'ESENYURT',
  'FATIH',
  'GAZIOSMANPASA',
  'GUNGOREN',
  'KADIKOY',
  'KAGITHANE',
  'KARTAL',
  'KUCUKCEKMECE',
  'MALTEPE',
  'PENDIK',
  'SANCAKTEPE',
  'SARIYER',
  'SILIVRI',
  'SULTANBEYLI',
  'SULTANGAZI',
  'SILE',
  'SISLI',
  'TUZLA',
  'UMRANIYE',
  'USKUDAR',
  'ZEYTINBURNU'],
 [],
 ['EYUP'])

In [281]:
ses.loc[ses['ilce'] == 'EYUP', 'ilce'] = 'EYUPSULTAN'

In [282]:
database = pd.merge(database, ses, on=['ilce', 'mahalle'], how='left')
database.head(3)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi,ses_klasmanı,ses_skoru
0,ADALAR,BURGAZADA,5,B,62.5
1,ADALAR,HEYBELIADA,22,B,62.5
2,ADALAR,KINALIADA,13,C,37.5


In [283]:
park = pd.read_csv('../Datasets/PROCESSED/park.csv')
park.dropna(inplace=True)
park.head(3)

Unnamed: 0,TÜR,MAHAL ADI,İLÇE,latitude,longitude
0,PARK,KAYALI BAHÇE PARKI,BAKIRKÖY,40.973001,28.78924
1,PARK,FLORYA SAHİL PARKI,BAKIRKÖY,40.970045,28.791253
2,PARK,FLORYA AKVARYUM ÇEVRESİ,BAKIRKÖY,40.965659,28.798258


In [284]:
park['TÜR'].unique()

array(['PARK', 'METRO ÇIKIŞI', 'KENT ORMANI', 'HATIRA ORMANI', 'KORU',
       'MESİRE YERİ', 'KAVŞAK-REFÜJ', 'KAMU', 'MEYDAN',
       'AĞAÇLANDIRMA SAHASI', 'KÖY PARKLARI'], dtype=object)

In [285]:
parklar = park[park['TÜR'] == 'PARK']
parklar.head(3)

Unnamed: 0,TÜR,MAHAL ADI,İLÇE,latitude,longitude
0,PARK,KAYALI BAHÇE PARKI,BAKIRKÖY,40.973001,28.78924
1,PARK,FLORYA SAHİL PARKI,BAKIRKÖY,40.970045,28.791253
2,PARK,FLORYA AKVARYUM ÇEVRESİ,BAKIRKÖY,40.965659,28.798258


In [286]:
parklar['İLÇE'].unique()

array(['BAKIRKÖY', 'FATİH', 'GÜNGÖREN', 'ZEYTİNBURNU', 'BEYOĞLU',
       'BEŞİKTAŞ', 'KAĞITHANE', 'ŞİŞLİ', 'SARIYER', 'BAŞAKŞEHİR',
       'EYÜPSULTAN', 'ARNAVUTKÖY', 'BAYRAMPAŞA', 'SULTANGAZİ',
       'GAZİOSMANPAŞA', 'ESENLER', 'BÜYÜKÇEKMECE', 'AVCILAR',
       'BEYLİKDÜZÜ', 'ESENYURT', 'BAĞCILAR', 'KÜÇÜKÇEKMECE', 'SİLİVRİ',
       'BEYKOZ', 'PENDİK', 'KARTAL', 'TUZLA', 'MALTEPE', 'KADIKÖY',
       'BÜYÜKADA', 'HEYBELİADA', 'ŞİLE ', 'SANCAKTEPE', 'ŞİLE',
       'SULTANBEYLİ', 'ÜMRANİYE', 'ATAŞEHİR', 'ÜSKÜDAR'], dtype=object)

In [287]:
parklar = parklar[['İLÇE', 'TÜR']].groupby(['İLÇE', 'TÜR']).size().reset_index(name='count')
parklar.head(3)

Unnamed: 0,İLÇE,TÜR,count
0,ARNAVUTKÖY,PARK,1
1,ATAŞEHİR,PARK,4
2,AVCILAR,PARK,4


In [288]:
metrolar = park[park['TÜR'] == 'METRO ÇIKIŞI']
metrolar.head(3)

Unnamed: 0,TÜR,MAHAL ADI,İLÇE,latitude,longitude
185,METRO ÇIKIŞI,NURTEPE ACİL METRO ÇIKIŞI,EYÜPSULTAN,41.079911,28.963314
186,METRO ÇIKIŞI,KAĞITHANE METRO ÇIKIŞI,KAĞITHANE,41.079485,28.974043
187,METRO ÇIKIŞI,DARÜŞAFAKA METRO İSTASYONU ÇIKIŞI(VE ÇEVRESİ),BEŞİKTAŞ,41.129987,29.02501


In [289]:
metrolar['İLÇE'].unique()

array(['EYÜPSULTAN', 'KAĞITHANE', 'BEŞİKTAŞ', 'SARIYER', 'BAĞCILAR',
       'BAŞAKŞEHİR', 'KÜÇÜKÇEKMECE', 'ESENLER', 'GAZİOSMANPAŞA', 'FATİH',
       'ŞİŞLİ', 'ÜSKÜDAR', 'ÜMRANİYE', 'ÇEKMEKÖY'], dtype=object)

In [290]:
metrolar = metrolar[['İLÇE', 'TÜR']].groupby(['İLÇE', 'TÜR']).size().reset_index(name='count')
metrolar.head(3)

Unnamed: 0,İLÇE,TÜR,count
0,BAĞCILAR,METRO ÇIKIŞI,6
1,BAŞAKŞEHİR,METRO ÇIKIŞI,3
2,BEŞİKTAŞ,METRO ÇIKIŞI,2


In [291]:
park_metro_df = pd.concat([parklar, metrolar], axis=0)
park_metro_df.head(3)

Unnamed: 0,İLÇE,TÜR,count
0,ARNAVUTKÖY,PARK,1
1,ATAŞEHİR,PARK,4
2,AVCILAR,PARK,4


In [292]:
park_metro_df['İLÇE'] = park_metro_df['İLÇE'].str.upper()
park_metro_df['İLÇE'] = park_metro_df['İLÇE'].apply(lambda x: ''.join([turkish_cases.get(i, i) for i in x]))
park_metro_df.head(3)

Unnamed: 0,İLÇE,TÜR,count
0,ARNAVUTKOY,PARK,1
1,ATASEHIR,PARK,4
2,AVCILAR,PARK,4


In [293]:
park_metro_df.columns = ['ilce', 'tur', 'tur_count']

In [294]:
database = pd.merge(database, park_metro_df, on=['ilce'], how='left')
database.head(3)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi,ses_klasmanı,ses_skoru,tur,tur_count
0,ADALAR,BURGAZADA,5,B,62.5,,
1,ADALAR,HEYBELIADA,22,B,62.5,,
2,ADALAR,KINALIADA,13,C,37.5,,


In [295]:
database.sort_values('tur_count', ascending=False).head(3)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi,ses_klasmanı,ses_skoru,tur,tur_count
243,BEYKOZ,FATIH,39,D,25.0,PARK,29.0
253,BEYKOZ,KAYNARCA,10,C,37.5,PARK,29.0
247,BEYKOZ,GOZTEPE,15,A,87.5,PARK,29.0


In [296]:
nufus_maaliyet = pd.read_csv('../Datasets/PROCESSED/nufus_maaliyet.csv')
nufus_maaliyet.head(3)

Unnamed: 0,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet
0,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000
1,ARNAVUTKOY,YUNUS EMRE,14919.0,16085,1.1,201000
2,ARNAVUTKOY,BOGAZKOY ATATURK,11965.0,7726,0.65,201000


In [297]:
database['mahalle'] = database['mahalle'].str.replace(' ', '')
database.head(10)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi,ses_klasmanı,ses_skoru,tur,tur_count
0,ADALAR,BURGAZADA,5,B,62.5,,
1,ADALAR,HEYBELIADA,22,B,62.5,,
2,ADALAR,KINALIADA,13,C,37.5,,
3,ADALAR,MADEN,27,B,62.5,,
4,ADALAR,NIZAM,20,B+,75.0,,
5,ARNAVUTKOY,ADNANMENDERES,127,E,12.5,PARK,1.0
6,ARNAVUTKOY,ANADOLU,1360,E,12.5,PARK,1.0
7,ARNAVUTKOY,ARNAVUTKOYMERKEZ,309,D,25.0,PARK,1.0
8,ARNAVUTKOY,ATATURK,420,E,12.5,PARK,1.0
9,ARNAVUTKOY,BAKLALI,8,E,12.5,PARK,1.0


In [298]:
nufus_maaliyet['mahalle'] = nufus_maaliyet['mahalle'].str.replace(' ', '')
nufus_maaliyet.head(3)

Unnamed: 0,ilce,mahalle,nufus_yogunlugu,nufus,yuzolcumu,maliyet
0,ARNAVUTKOY,ANADOLU,21607.0,33177,1.5,201000
1,ARNAVUTKOY,YUNUSEMRE,14919.0,16085,1.1,201000
2,ARNAVUTKOY,BOGAZKOYATATURK,11965.0,7726,0.65,201000


In [299]:
database = pd.merge(database, nufus_maaliyet, on=['ilce', 'mahalle'], how='inner')

In [300]:
database.sort_values('maliyet', ascending=False).head(3)

Unnamed: 0,ilce,mahalle,yardim_alan_hane_sayisi,ses_klasmanı,ses_skoru,tur,tur_count,nufus_yogunlugu,nufus,yuzolcumu,maliyet
846,SARIYER,YENIKOY,37,A,87.5,PARK,20.0,6926.0,15481,2.2,819000
819,SARIYER,KISIRKAYA,3,E,12.5,METRO ÇIKIŞI,1.0,33.3,333,10.0,819000
827,SARIYER,MASLAK,4,A,87.5,METRO ÇIKIŞI,1.0,1328.0,8091,6.1,819000
