In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (8,8)
# import geopandas as gpd
import seaborn
import matplotlib as mpl
# from shapely.geometry import Point,LineString,LinearRing,Polygon
# from shapely.geometry import MultiPoint,MultiLineString,MultiPolygon
import os
%matplotlib inline

# 读取数据, 做基本处理

In [10]:
# 读取placepulse对比数据
votes = pd.read_csv(r"E:\202203PlacePulse\1.PlacePulse2.0dataset\place-pulse-2.0\votes.tsv",sep="\t")

# 筛选 只保留left equal right三个有效数据
votes = votes[votes['choice'].isin(['left', 'equal', 'right',])]

# 读取placepulse学习数据 记录着每个学习id对应的感知（50a68a51fdc9f05596000002 → safer）
studies = pd.read_csv(r"E:\202203PlacePulse\1.PlacePulse2.0dataset\place-pulse-2.0\studies.tsv",sep="\t") 

# 将对比数据和id代表的感知进行合并
df = pd.merge(votes,studies,left_on='study_id',right_on='_id')

# 只保留需要用到的几个列
df = df[['choice', 'left', 'right','study_question']]
df

Unnamed: 0,choice,left,right,study_question
0,left,513d2dbefdc9f03587002515,51413b2ffdc9f049260058ca,wealthier
1,left,514145e8fdc9f049260066b8,50f5ed65fdc9f065f0008ce9,wealthier
2,right,513da066fdc9f0358700897b,50f5e70cfdc9f065f000702c,wealthier
3,left,5141355bfdc9f049260049a4,5141379dfdc9f04926004faa,wealthier
4,left,50f5eb17fdc9f065f0007f52,513cbcb8fdc9f03587001005,wealthier
...,...,...,...,...
1555533,left,51409a80fdc9f04926000f5f,50f60867beb2fed6f80003f7,more beautiful
1555534,left,50f42eaefdc9f065f000231b,513d6975fdc9f0358700472e,more beautiful
1555535,right,50f42c19fdc9f065f0001877,51421a8bfdc9f04926008368,more beautiful
1555536,left,50f61674beb2fed9100008a6,513da021fdc9f035870088de,more beautiful


# 计算placepulse中每个感知的分数

In [17]:
# 记录着哪几种感知
df['study_question'].unique()

array(['wealthier', 'more depressing', 'safer', 'livelier', 'more boring',
       'more beautiful'], dtype=object)

In [19]:
for _type in df['study_question'].unique()[:]:
    perception = df[df['study_question'] == _type]


    def get_PN1(x):
        p = x[x['choice'] == 'left']
        n = x[x['choice'] == 'right']

        Pi = len(p)
        Ni = len(n)
        Ai = len(x)
        return Pi,Ni,Ai

    def get_PN2(x):
        p = x[x['choice'] == 'right']
        n = x[x['choice'] == 'left']

        Pi = len(p)
        Ni = len(n)
        Ai = len(x)
        return Pi,Ni,Ai


    def get_Pi_and_Ni(x):
        left_val = x[0]
        right_val = x[1]

        if x[0] is np.nan:
            left_val = (0,0,0)
        if x[1] is np.nan:
            right_val = (0,0,0)

        sum_ = left_val[-1] + right_val[-1]
        Pi = (left_val[0] + right_val[0]) / sum_
        Ni = (left_val[1] + right_val[1]) / sum_

        return Pi,Ni

    
    part1 = perception.groupby(by='left').apply(get_PN1)
    part2 = perception.groupby(by='right').apply(get_PN2)
    result = pd.concat([part1,part2],axis=1)
    
    
    result['Pi_and_Ni'] = result[[0,1]].apply(get_Pi_and_Ni,axis=1)
    result['score'] = result['Pi_and_Ni'].apply(lambda x:(10/3) * (2*x[0] - x[1] + 1))
    result_final = result[['score']].reset_index()
    result_final.to_csv(f'./data/1.训练集感知分数/{_type}.csv',index=False)

# 读取储存的数据,并做合并

In [38]:
# 读取单个感知数据
def concat_dt(_type):
    d1 = pd.read_csv(f'./data/1.训练集感知分数/{_type}.csv')
    d1 = d1.set_index('index')
    d1.columns = [_type]
    return d1

# 将6个感知分数进行合并
data_all = pd.concat([concat_dt(_type) for _type in df['study_question'].unique()],axis=1)

# 去除空值
data_all = data_all.dropna()

# 保存结果
data_all.to_csv(f'./data/1.训练集感知分数/data_all.csv')