In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
import pandas as pd
import numpy as np

In [None]:
def extract_features(image_path, model, preprocess):
    # load images
    img = Image.open(image_path)

    # pre-process
    img_t = preprocess(img)

    # add batch dimension
    img_t = img_t.unsqueeze(0)

    # extract features
    with torch.no_grad():
        features = model(img_t)

    # convert to 1-dimensional tensor
    features = features.squeeze()

    return features

In [None]:
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*(list(model.children())[:-1]))
model.eval()



Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [None]:
#preprocess
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

# 设置你的图片文件夹路径
folder_path = '/content/drive/My Drive/Colab Notebooks/Data/PaipaSVI/add2/'
files = []

if not os.path.exists(folder_path):
    print(f"指定的路径不存在：{folder_path}")
else:
    # 尝试使用os.scandir()来分批次读取文件
    try:
        with os.scandir(folder_path) as it:
            for entry in it:
                if entry.is_file():
                    files.append(entry.name)  # 直接将文件名添加到files列表
                    # 检查当前文件数量，如果需要可以在这里打印状态
                    if len(files) % 1000 == 0:  # 每1000个文件打印一次状态
                        print(f"已处理{len(files)}个文件...")
    except OSError as e:
        print(f"遇到错误：{e}")

# 最后，打印总文件数量确认
print(f"总共找到了{len(files)}个文件。")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
已处理1000个文件...
已处理2000个文件...
已处理3000个文件...
已处理4000个文件...
总共找到了4345个文件。


In [None]:
features_dict = {}
i = 0
for file in files:
    if file[-1] == 'g':
        img_path = os.path.join(folder_path, file)
        features = extract_features(img_path, model, preprocess)
        features_dict[file] = features
        i += 1
        if i % 100 == 0:
          print(f'{i} files has been extracted')

100 files has been extracted
200 files has been extracted
300 files has been extracted
400 files has been extracted
500 files has been extracted
600 files has been extracted
700 files has been extracted
800 files has been extracted
900 files has been extracted
1000 files has been extracted
1100 files has been extracted
1200 files has been extracted
1300 files has been extracted
1400 files has been extracted
1500 files has been extracted
1600 files has been extracted
1700 files has been extracted
1800 files has been extracted
1900 files has been extracted
2000 files has been extracted
2100 files has been extracted
2200 files has been extracted
2300 files has been extracted
2400 files has been extracted
2500 files has been extracted
2600 files has been extracted
2700 files has been extracted
2800 files has been extracted
2900 files has been extracted
3000 files has been extracted
3100 files has been extracted
3200 files has been extracted
3300 files has been extracted
3400 files has been

In [None]:
# Create a list of tuples, each containing the image_id and the corresponding feature vector converted to a list
data_list = [(key, value.tolist()) for key, value in features_dict.items()]

# Create a DataFrame with the image IDs and the list of features
df_features = pd.DataFrame(data_list, columns=['image_id', 'features'])

# Save the DataFrame to a CSV file without including the row index
df_features.to_csv('/content/drive/My Drive/Colab Notebooks/Data/cll_final_add2.csv', index=False)

In [None]:
df1 = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/cll_final.csv')
df2 = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/cll_final_add.csv')
df3 = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/cll_final_add2.csv')
df_fi = pd.concat([df1, df2, df3], axis = 0)
df_fi.reset_index(drop=True, inplace=True)
df_fi

Unnamed: 0,image_id,features
0,5205.0_panorama.jpg,"[0.266784131526947, 1.2442797422409058, 0.3355..."
1,5206.0_panorama.jpg,"[0.6613556742668152, 0.7590034604072571, 0.288..."
2,5207.0_panorama.jpg,"[0.5054644346237183, 0.9782446026802063, 0.316..."
3,5208.0_panorama.jpg,"[0.6890900135040283, 1.1312485933303833, 0.525..."
4,5209.0_panorama.jpg,"[0.4193248450756073, 0.8977274894714355, 0.299..."
...,...,...
16969,16954.0_panorama.jpg,"[0.3880627751350403, 1.5375115871429443, 0.160..."
16970,16955.0_panorama.jpg,"[0.2674499750137329, 1.420997142791748, 0.0758..."
16971,16972.0_panorama.jpg,"[0.13584919273853302, 0.5921735763549805, 0.03..."
16972,16970.0_panorama.jpg,"[0.2558000385761261, 1.0976775884628296, 0.153..."


In [None]:
df_filter = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/PaipaSVI/points.csv')
df_filter

Unnamed: 0,OID_,POINT_X,POINT_Y
0,1,-73.117803,5.783468
1,2,-73.117801,5.783558
2,3,-73.117800,5.783648
3,4,-73.117798,5.783737
4,5,-73.117796,5.783827
...,...,...,...
16970,16971,-73.108744,5.773042
16971,16972,-73.108701,5.772968
16972,16973,-73.108658,5.772895
16973,16974,-73.108615,5.772821


In [None]:
df_fi = df_fi.rename(columns={'image_id': 'OID_'})
df_fi

Unnamed: 0,OID_,features
0,5205.0_panorama.jpg,"[0.266784131526947, 1.2442797422409058, 0.3355..."
1,5206.0_panorama.jpg,"[0.6613556742668152, 0.7590034604072571, 0.288..."
2,5207.0_panorama.jpg,"[0.5054644346237183, 0.9782446026802063, 0.316..."
3,5208.0_panorama.jpg,"[0.6890900135040283, 1.1312485933303833, 0.525..."
4,5209.0_panorama.jpg,"[0.4193248450756073, 0.8977274894714355, 0.299..."
...,...,...
16969,16954.0_panorama.jpg,"[0.3880627751350403, 1.5375115871429443, 0.160..."
16970,16955.0_panorama.jpg,"[0.2674499750137329, 1.420997142791748, 0.0758..."
16971,16972.0_panorama.jpg,"[0.13584919273853302, 0.5921735763549805, 0.03..."
16972,16970.0_panorama.jpg,"[0.2558000385761261, 1.0976775884628296, 0.153..."


In [None]:
for index, row in df_fi.iterrows():
    # 分割字符串，取第一部分，并转换为整数
    new_value = int(row['OID_'].split('.')[0])
    # 更新 DataFrame 的值
    df_fi.at[index, 'OID_'] = new_value
df_fi

Unnamed: 0,OID_,features
0,5205,"[0.266784131526947, 1.2442797422409058, 0.3355..."
1,5206,"[0.6613556742668152, 0.7590034604072571, 0.288..."
2,5207,"[0.5054644346237183, 0.9782446026802063, 0.316..."
3,5208,"[0.6890900135040283, 1.1312485933303833, 0.525..."
4,5209,"[0.4193248450756073, 0.8977274894714355, 0.299..."
...,...,...
16969,16954,"[0.3880627751350403, 1.5375115871429443, 0.160..."
16970,16955,"[0.2674499750137329, 1.420997142791748, 0.0758..."
16971,16972,"[0.13584919273853302, 0.5921735763549805, 0.03..."
16972,16970,"[0.2558000385761261, 1.0976775884628296, 0.153..."


In [None]:
df = pd.merge(df_fi, df_filter, on='OID_')
df

Unnamed: 0,OID_,features,POINT_X,POINT_Y
0,5205,"[0.266784131526947, 1.2442797422409058, 0.3355...",-73.110276,5.778463
1,5206,"[0.6613556742668152, 0.7590034604072571, 0.288...",-73.110219,5.778415
2,5207,"[0.5054644346237183, 0.9782446026802063, 0.316...",-73.110133,5.778435
3,5208,"[0.6890900135040283, 1.1312485933303833, 0.525...",-73.110046,5.778456
4,5209,"[0.4193248450756073, 0.8977274894714355, 0.299...",-73.109960,5.778477
...,...,...,...,...
16969,16954,"[0.3880627751350403, 1.5375115871429443, 0.160...",-73.109614,5.774251
16970,16955,"[0.2674499750137329, 1.420997142791748, 0.0758...",-73.109558,5.774182
16971,16972,"[0.13584919273853302, 0.5921735763549805, 0.03...",-73.108701,5.772968
16972,16970,"[0.2558000385761261, 1.0976775884628296, 0.153...",-73.108787,5.773115


In [None]:
df_filtered = df[df['POINT_Y'] <= 5.81]
df_filtered

Unnamed: 0,OID_,features,POINT_X,POINT_Y
0,5205,"[0.266784131526947, 1.2442797422409058, 0.3355...",-73.110276,5.778463
1,5206,"[0.6613556742668152, 0.7590034604072571, 0.288...",-73.110219,5.778415
2,5207,"[0.5054644346237183, 0.9782446026802063, 0.316...",-73.110133,5.778435
3,5208,"[0.6890900135040283, 1.1312485933303833, 0.525...",-73.110046,5.778456
4,5209,"[0.4193248450756073, 0.8977274894714355, 0.299...",-73.109960,5.778477
...,...,...,...,...
16969,16954,"[0.3880627751350403, 1.5375115871429443, 0.160...",-73.109614,5.774251
16970,16955,"[0.2674499750137329, 1.420997142791748, 0.0758...",-73.109558,5.774182
16971,16972,"[0.13584919273853302, 0.5921735763549805, 0.03...",-73.108701,5.772968
16972,16970,"[0.2558000385761261, 1.0976775884628296, 0.153...",-73.108787,5.773115


In [None]:
df_filtered.to_csv('/content/drive/My Drive/Colab Notebooks/Data/paipa_features.csv')