In [83]:
# Импортируется библиотека pandas для работы с табличными данными.
import pandas as pd

In [84]:
# Создаётся простой список чисел от 1 до 5.
my_list = [1, 2, 3, 4, 5]

In [85]:
# Цикл выводит числа от 5 до 1 в обратном порядке. Это демонстрация работы функции range() с отрицательным шагом.
for i in range(5, 0, -1):
    print(i)

5
4
3
2
1


In [86]:
# Возвращает копию списка my_list в обратном порядке с помощью среза ([::-1]). Результат: [5, 4, 3, 2, 1]
my_list[::-1]

[5, 4, 3, 2, 1]

In [87]:
# Загружается CSV-файл only_last.csv в DataFrame df. Используется точка с запятой (;) как разделитель колонок (часто встречается в CSV из Excel).
df = pd.read_csv('only_last.csv', sep=';')

In [88]:
# Отображаются первые 5 строк датафрейма для быстрого ознакомления со структурой данных.
df.head()

Unnamed: 0,address,adm_district,city_district,lattitude,longitude
0,"город Москва, улица Егора Абакумова, дом 9",Северо-восточный,Ярославский,55.878996,37.714462
1,"город Москва, улица Талалихина, дом 2/1, корпус 1",Центральный,Таганский,55.738298,37.673337
2,"город Москва, Абельмановская улица, дом 6",Центральный,Таганский,55.735528,37.669516
3,"город Москва, Абрамцевская улица, дом 1",Северо-восточный,Лианозово,55.892653,37.573036
4,"город Москва, Абрамцевская улица, дом 9, корпус 1",Северо-восточный,Лианозово,55.904126,37.572181


In [89]:
#  Выводятся уникальные значения столбца adm_district — административных округов Москвы (включая NaN)
df.adm_district.unique()

array(['Северо-восточный', 'Центральный', 'Юго-восточный', 'Западный',
       'Юго-западный', 'Северный', 'Восточный', 'Южный',
       'Северо-западный', 'Зеленоградский', 'Троицкий', 'Новомосковский',
       nan], dtype=object)

In [90]:
# Показывает размер датафрейма: 15 366 строк и 5 столбцов.
df.shape

(15366, 5)

In [91]:
# Подсчитывается количество уникальных адресов — 9 085. Это означает, что некоторые адреса повторяются.
df.address.nunique()

9085

In [92]:
# Группировка данных по административному и внутригородскому районам с вычислением средних значений широты (lattitude) и долготы (longitude).
# Результат — один центр координат на каждый район. reset_index() превращает индексы групп в обычные столбцы.
grouped_df =  df.groupby(['adm_district', 'city_district']) \
    .agg({'lattitude': 'mean', 'longitude': 'mean'}) \
    .reset_index()

In [93]:
# Создаётся сводная таблица, где строки — административные округа, столбцы — внутригородские районы, а значения — средняя широта.
# Отсутствующие комбинации заполняются нулями.
pivot_df = grouped_df.pivot(index='adm_district', columns= 'city_district', values = 'lattitude').fillna(0)

In [94]:
# Просмотр первых 5 строк сводной таблицы. Большинство значений — нули, так как не все районы принадлежат каждому округу.
pivot_df.head()

city_district,Академический,Алексеевский,Алтуфьевский,Арбат,Аэропорт,Бабушкинский,Басманный,Беговой,Бескудниковский,Бибирево,...,Чертаново Центральное,Чертаново Южное,Щукино,Южное Бутово,Южное Медведково,Южное Тушино,Южнопортовый,Якиманка,Ярославский,Ясенево
adm_district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Восточный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Западный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Зеленоградский,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Новомосковский,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Северный,0.0,0.0,0.0,0.0,55.802004,0.0,0.0,55.784528,55.867772,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [95]:
# Сортировка сводной таблицы по убыванию значений в столбце 'Арбат'.
# Показывает, в каком административном округе находится район Арбат (это Центральный)
pivot_df.sort_values(by = 'Арбат', ascending = False).head(10)

city_district,Академический,Алексеевский,Алтуфьевский,Арбат,Аэропорт,Бабушкинский,Басманный,Беговой,Бескудниковский,Бибирево,...,Чертаново Центральное,Чертаново Южное,Щукино,Южное Бутово,Южное Медведково,Южное Тушино,Южнопортовый,Якиманка,Ярославский,Ясенево
adm_district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Центральный,0.0,0.0,0.0,55.751137,0.0,0.0,55.763301,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.733163,0.0,0.0
Восточный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Зеленоградский,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Западный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Новомосковский,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Северный,0.0,0.0,0.0,0.0,55.802004,0.0,0.0,55.784528,55.867772,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Северо-западный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,55.800362,0.0,0.0,55.84477,0.0,0.0,0.0,0.0
Северо-восточный,0.0,55.814521,55.879761,0.0,0.0,55.868705,0.0,0.0,0.0,55.891729,...,0.0,0.0,0.0,0.0,55.872609,0.0,0.0,0.0,55.865552,0.0
Троицкий,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Юго-восточный,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,55.714484,0.0,0.0,0.0


In [96]:
#  То же самое, но только с одним столбцом — 'Арбат'. Упрощённый вывод.
pivot_df[['Арбат']].sort_values(by = 'Арбат', ascending = False).head(10)

city_district,Арбат
adm_district,Unnamed: 1_level_1
Центральный,55.751137
Восточный,0.0
Зеленоградский,0.0
Западный,0.0
Новомосковский,0.0
Северный,0.0
Северо-западный,0.0
Северо-восточный,0.0
Троицкий,0.0
Юго-восточный,0.0


In [97]:
# То же, что и выше, но с явным выводом через print.
print(pivot_df[['Арбат']].sort_values(by = 'Арбат', ascending = False).head(10))

city_district         Арбат
adm_district               
Центральный       55.751137
Восточный          0.000000
Зеленоградский     0.000000
Западный           0.000000
Новомосковский     0.000000
Северный           0.000000
Северо-западный    0.000000
Северо-восточный   0.000000
Троицкий           0.000000
Юго-восточный      0.000000


In [98]:
#  Фильтрация данных только по району 'Арбат', группировка и вычисление средних координат.
#  Подтверждает, что Арбат находится в Центральном округе.
(df[df.city_district == 'Арбат'].groupby(['adm_district', 'city_district']) \
    .agg({'lattitude': 'mean', 'longitude': 'mean'}) \
    .sort_values(by = 'lattitude', ascending = False))

Unnamed: 0_level_0,Unnamed: 1_level_0,lattitude,longitude
adm_district,city_district,Unnamed: 2_level_1,Unnamed: 3_level_1
Центральный,Арбат,55.751137,37.591079


In [99]:
# Вычисление средних координат по каждому административному округу, исключая Центральный.
df[df.adm_district != 'Центральный'].groupby('adm_district') \
                                    .agg({'lattitude': 'mean', 'longitude': 'mean'})


Unnamed: 0_level_0,lattitude,longitude
adm_district,Unnamed: 1_level_1,Unnamed: 2_level_1
Восточный,55.780355,37.769285
Западный,55.707884,37.469496
Зеленоградский,55.990076,37.193956
Новомосковский,55.59438,37.449411
Северный,55.821062,37.530658
Северо-восточный,55.846583,37.625346
Северо-западный,55.827638,37.426582
Троицкий,55.441492,37.272268
Юго-восточный,55.698667,37.748287
Юго-западный,55.642232,37.548271


In [100]:
# группировка по районам
# Повторная группировка по парам (adm_district, city_district) с вычислением средних координат.
# Результат — 127 уникальных районов с их центрами.
grouped_df_2 =  df.groupby(['adm_district', 'city_district']) \
    .agg({'lattitude': 'mean', 'longitude': 'mean'}) \
    .reset_index()
grouped_df_2

Unnamed: 0,adm_district,city_district,lattitude,longitude
0,Восточный,Богородское,55.814864,37.724862
1,Восточный,Вешняки,55.728474,37.824255
2,Восточный,Восточное Измайлово,55.794768,37.814138
3,Восточный,Восточный,55.864644,37.846768
4,Восточный,Гольяново,55.815373,37.794369
...,...,...,...,...
122,Южный,Орехово-Борисово Южное,55.606420,37.727394
123,Южный,Царицыно,55.629512,37.658308
124,Южный,Чертаново Северное,55.632358,37.607856
125,Южный,Чертаново Центральное,55.612826,37.604631


In [101]:
# средняя координата всех районов (не включая Центральный)
# Фильтрует все районы, кроме Центрального, и вычисляет общее среднее по широте и долготе —
# как будто один общий центр всех остальных районов.
filtered_df =  grouped_df_2[grouped_df_2.adm_district != 'Центральный']
mean_values = filtered_df[['lattitude', 'longitude']].mean()
mean_values

lattitude    55.757083
longitude    37.587613
dtype: float64

In [102]:
# средняя координата по каждому району (не включая Центральный)
# вычисляются средние координаты по каждому административному округу, исключая Центральный.
# аналогично ячейке чуть выше
df[df.adm_district != 'Центральный'].groupby('adm_district') \
                                    [['lattitude', 'longitude']] \
                                    .mean()


Unnamed: 0_level_0,lattitude,longitude
adm_district,Unnamed: 1_level_1,Unnamed: 2_level_1
Восточный,55.780355,37.769285
Западный,55.707884,37.469496
Зеленоградский,55.990076,37.193956
Новомосковский,55.59438,37.449411
Северный,55.821062,37.530658
Северо-восточный,55.846583,37.625346
Северо-западный,55.827638,37.426582
Троицкий,55.441492,37.272268
Юго-восточный,55.698667,37.748287
Юго-западный,55.642232,37.548271
