## 地铁站评分标准

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

criterion1 = pd.read_csv('../../data/district.csv')

bins = 15
# 使用retbins=True来获取分割点
criterion1['subway_category'], bins_edges1 = pd.cut(criterion1['subway'], bins=bins, labels=range(bins), retbins=True)

average_by_subway_category = criterion1.groupby('subway_category', observed=True)['mall_cnt'].mean().reset_index()

# 生成每个category的范围字符串
ranges = [f"{bins_edges1[i]}-{bins_edges1[i + 1]}" for i in range(len(bins_edges1) - 1)]

# 将范围字符串添加到DataFrame中
average_by_subway_category['subway_range'] = ranges

print(average_by_subway_category)

   subway_category  mall_cnt                             subway_range
0                0  1.285714               -0.001-0.06666666666666667
1                1  1.000000  0.06666666666666667-0.13333333333333333
2                2  2.500000                  0.13333333333333333-0.2
3                3  1.000000                  0.2-0.26666666666666666
4                4  3.222222   0.26666666666666666-0.3333333333333333
5                5  2.666667                   0.3333333333333333-0.4
6                6  4.857143                   0.4-0.4666666666666667
7                7  4.555556    0.4666666666666667-0.5333333333333333
8                8  5.500000                   0.5333333333333333-0.6
9                9  6.571429                   0.6-0.6666666666666666
10              10  3.166667    0.6666666666666666-0.7333333333333333
11              11  2.900000                   0.7333333333333333-0.8
12              12  2.500000                   0.8-0.8666666666666667
13              13  

## Population 评分标准

In [3]:
import numpy as np

criterion2 = pd.read_csv('../../data/district.csv')

bins = 26
max_value = criterion1['house_cnt'].max()
bin_width = max_value / bins

# 使用numpy生成区间边界
bins_edges2 = np.arange(0, max_value + bin_width, bin_width)

# 使用pd.cut来分配每个house_cnt到对应的区间
criterion2['house_cnt_category'], _ = pd.cut(criterion2['house_cnt'], bins=bins_edges2, labels=range(bins),
                                             retbins=True)

# 按照house_cnt_category分组并计算每个组的mall_cnt平均值
average_by_house_cnt_category = criterion2.groupby('house_cnt_category', observed=True)['mall_cnt'].mean().reset_index()

# 生成每个category的范围字符串
ranges = [f"{bins_edges2[i]}-{bins_edges2[i + 1]}" for i in range(len(bins_edges2) - 1)]

# 由于最后一个区间可能没有数据，我们需要确保ranges的长度与average_by_house_cnt_category的长度一致
average_by_house_cnt_category['house_cnt_range'] = ranges[:len(average_by_house_cnt_category)]

print(average_by_house_cnt_category)

   house_cnt_category   mall_cnt                        house_cnt_range
0                   0   1.580645                  0.0-675.8461538461538
1                   1   2.233333   675.8461538461538-1351.6923076923076
2                   2   2.296296  1351.6923076923076-2027.5384615384614
3                   3   3.538462  2027.5384615384614-2703.3846153846152
4                   4   2.125000   2703.3846153846152-3379.230769230769
5                   5   7.500000    3379.230769230769-4055.076923076923
6                   6   6.200000    4055.076923076923-4730.923076923076
7                   8   7.000000   4730.923076923076-5406.7692307692305
8                   9   5.000000   5406.7692307692305-6082.615384615385
9                  10  16.000000    6082.615384615385-6758.461538461538
10                 12   4.000000   6758.461538461538-7434.3076923076915
11                 15   3.000000   7434.3076923076915-8110.153846153846
12                 16   3.000000               8110.153846153846

## 房型评分标准

In [4]:
criterion3 = pd.read_csv('../../data/district.csv')

# 计算mall_cnt与每个buildingType相乘的结果
product_towel = criterion3['mall_cnt'] * criterion3['buildingType_towel']
product_bungalow = criterion3['mall_cnt'] * criterion3['buildingType_bungalow']
product_comb = criterion3['mall_cnt'] * criterion3['buildingType_comb']
product_plate = criterion3['mall_cnt'] * criterion3['buildingType_plate']

# 计算每个乘积的平均值
average_towel = product_towel.sum() / criterion3['buildingType_towel'].sum()
average_bungalow = product_bungalow.sum() / criterion3['buildingType_bungalow'].sum()
average_comb = product_comb.sum() / criterion3['buildingType_comb'].sum()
average_plate = product_plate.sum() / criterion3['buildingType_plate'].sum()

# 打印结果
print(f"Average for Towel: {average_towel}")
print(f"Average for Bungalow: {average_bungalow}")
print(f"Average for Comb: {average_comb}")
print(f"Average for Plate: {average_plate}")


Average for Towel: 3.289457356934159
Average for Bungalow: 1.6804502032785749
Average for Comb: 3.0704818559531755
Average for Plate: 2.476455712051342


## 最终分数判定

In [5]:
import pandas as pd

# 加载数据
Ans = pd.read_csv('../../data/district.csv')

# 创建一个空的DataFrame来存储结果
results = pd.DataFrame(columns=['belonging', 'point1', 'point2', 'point3', 'Final_point'])

# 创建一个空列表来收集每行的数据
rows_list = []
i = 0
for index, row in Ans.iterrows():
    i = i + 1
    belonging = row['belonging']
    subway_value = row['subway']
    house_cnt_value = row['house_cnt']
    bungalow = row['buildingType_bungalow']
    comb = row['buildingType_comb']
    plate = row['buildingType_plate']
    towel = row['buildingType_towel']
    mall_cnt = row['mall_cnt']
    point1 = 0
    point2 = 0
    for index_subway, row_subway in average_by_subway_category.iterrows():
        if subway_value >= bins_edges1[index_subway] and subway_value < bins_edges1[index_subway + 1]:
            point1 = row_subway['mall_cnt']
            break
    for index_house_cnt, row_house_cnt in average_by_house_cnt_category.iterrows():
        if house_cnt_value >= bins_edges2[index_house_cnt] and house_cnt_value < bins_edges2[index_house_cnt + 1]:
            point2 = row_house_cnt['mall_cnt']
            break

    point3 = bungalow * average_bungalow + comb * average_comb + plate * average_plate + towel * average_towel
    Final_point = (point1 + point2 + point3) / 1.0 * max(mall_cnt, 1)
    #print(Final_point)
    rows_list.append({
        'belonging': int(belonging),
        'point1': point1,
        'point2': point2,
        'point3': point3,
        'Final_point': Final_point
    })

# 使用pandas.concat将所有行合并到results DataFrame中
results = pd.concat([results, pd.DataFrame(rows_list)], ignore_index=True)

# 创建一个包含0到327的belonging的DataFrame
all_belongings = pd.DataFrame({'belonging': range(0, 328)})

# 合并两个DataFrame，确保所有belonging都存在
results_full = pd.merge(all_belongings, results, on='belonging', how='left').fillna(0)

# 确保所有列除了belonging外，其他的都是整数类型
results_full[['point1', 'point2', 'point3', 'Final_point']] = results_full[
    ['point1', 'point2', 'point3', 'Final_point']].astype(float)

# 导出结果到CSV文件
results_full.to_csv('../../data/Evaluation_result_full.csv', index=False)