In [4]:
import pandas as pd
import os

# 读取数据
data_path = 'results.txt'
data = pd.read_csv(data_path, header=None, names=['filename', 'porosity', 'permeability', 'formation_factor'])

# 定义生成图像路径的函数
def generate_image_path(filename):
    basename = os.path.splitext(filename)[0]  # 去掉扩展名
    image_index = int(basename.split('_')[1])  # 提取数字部分
    folder_index = image_index // 100 + 1  # 计算文件夹索引
    return os.path.join(str(folder_index), filename)

# 假设图像文件在当前目录下的子文件夹中
#image_base_path = 'path/to/images'

# 添加图像路径到数据框
#data['file_path'] = data['filename'].apply(lambda x: os.path.join(image_base_path, generate_image_path(x)))
data['file_path'] = data['filename'].apply(lambda x: os.path.join(generate_image_path(x)))
data['porosity'] = pd.to_numeric(data['porosity'], errors='coerce')
data['permeability'] = pd.to_numeric(data['permeability'], errors='coerce')
data['formation_factor'] = pd.to_numeric(data['formation_factor'], errors='coerce')
# 检查数据框
print(data.head())


      filename  porosity  permeability  formation_factor      file_path
0  image_0.png    0.7821      8.295445          2.205310  1\image_0.png
1  image_1.png    0.6901      2.168622          3.531883  1\image_1.png
2  image_2.png    0.1971      0.002521        339.209059  1\image_2.png
3  image_3.png    0.3980      0.000852        476.741173  1\image_3.png
4  image_4.png    0.5590      3.153554          4.694168  1\image_4.png


In [5]:
# 去除渗透率或形成因子小于0的行
filtered_data = data[(data['permeability'] >= 0) & (data['formation_factor'] >= 0)]

# 检查过滤后的数据框
print(filtered_data.head())
print(f"原始数据行数: {len(data)}, 过滤后数据行数: {len(filtered_data)}")

      filename  porosity  permeability  formation_factor      file_path
0  image_0.png    0.7821      8.295445          2.205310  1\image_0.png
1  image_1.png    0.6901      2.168622          3.531883  1\image_1.png
2  image_2.png    0.1971      0.002521        339.209059  1\image_2.png
3  image_3.png    0.3980      0.000852        476.741173  1\image_3.png
4  image_4.png    0.5590      3.153554          4.694168  1\image_4.png
原始数据行数: 1000, 过滤后数据行数: 958


In [6]:
print(filtered_data.describe())

         porosity  permeability  formation_factor
count  958.000000  9.580000e+02      9.580000e+02
mean     0.489896  2.157759e+00      6.554743e+13
std      0.170661  2.861522e+00      9.556576e+14
min      0.175000  4.159036e-15      1.617284e+00
25%      0.346350  3.258744e-02      3.998784e+00
50%      0.490000  9.301814e-01      1.009481e+01
75%      0.632050  3.389625e+00      9.149953e+01
max      0.799100  2.120012e+01      2.492943e+16


In [22]:
from sklearn.preprocessing import StandardScaler

# 创建一个StandardScaler对象
scaler = StandardScaler()

# 对'porosity', 'permeability', 'formation_factor'进行标准化处理
standardized_data = filtered_data.copy()  # 复制数据框以免修改原始数据
standardized_data[['porosity', 'permeability', 'formation_factor']] = scaler.fit_transform(filtered_data[['porosity', 'permeability', 'formation_factor']])

# 打印标准化后的数据框
print(standardized_data.head())


      filename  porosity  permeability  formation_factor      file_path
0  image_0.png  1.713082      2.146023         -0.068625  1\image_0.png
1  image_1.png  1.173722      0.003798         -0.068625  1\image_1.png
2  image_2.png -1.716547     -0.753572         -0.068625  1\image_2.png
3  image_3.png -0.538748     -0.754156         -0.068625  1\image_3.png
4  image_4.png  0.405133      0.348177         -0.068625  1\image_4.png


In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import os

# 设置使用的设备：GPU或者CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 定义数据集类
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = self.dataframe.iloc[idx]['file_path']
        image = Image.open(image_path).convert('L')  # 以灰度模式打开图像
        label1 = torch.tensor(self.dataframe.iloc[idx]['porosity'], dtype=torch.float32)
        label2 = torch.tensor(self.dataframe.iloc[idx]['permeability'], dtype=torch.float32)
        label3 = torch.tensor(self.dataframe.iloc[idx]['formation_factor'], dtype=torch.float32)
                
        if self.transform:
            image = self.transform(image)
        
        return image, (label1, label2, label3)

# 数据增强和转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 调整图像大小
    transforms.ToTensor(),           # 转换为张量
    transforms.Normalize(mean=[0.5], std=[0.5])  # 标准化，因为是灰度图，只有一个通道
])

# 创建数据集实例
dataset = CustomDataset(filtered_data, transform=transform)

# 划分训练集和验证集
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# 数据加载器
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# 定义多任务学习的CNN模型
class MultiTaskCNN(nn.Module):
    def __init__(self):
        super(MultiTaskCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),  # 输入通道改为1
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier1 = nn.Sequential(
            nn.Linear(128 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 1)  # 任务1：porosity，输出为1维
        )
        self.classifier2 = nn.Sequential(
            nn.Linear(128 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 1)  # 任务2：permeability，输出为1维
        )
        self.classifier3 = nn.Sequential(
            nn.Linear(128 * 7 * 7, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 1)  # 任务3：formation_factor，输出为1维
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        output1 = self.classifier1(x)
        output2 = self.classifier2(x)
        output3 = self.classifier3(x)
        return output1, output2, output3

# 实例化模型和损失函数
model = MultiTaskCNN().to(device)
criterion = nn.MSELoss()  # 使用均方误差作为损失函数

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train_model(model, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, targets in train_loader:
            inputs = inputs.to(device)
            target1, target2, target3 = targets
            target1 = target1.to(device)
            target2 = target2.to(device)
            target3 = target3.to(device)
            
            optimizer.zero_grad()
            
            outputs1, outputs2, outputs3 = model(inputs)
            
            loss1 = criterion(outputs1, target1.unsqueeze(1))  # 添加维度以匹配输出形状
            loss2 = criterion(outputs2, target2.unsqueeze(1))
            loss3 = criterion(outputs3, target3.unsqueeze(1))
            
            loss = loss1 + loss2 + loss3
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        
        # 在验证集上评估模型
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(device)
                target1, target2, target3 = targets
                target1 = target1.to(device)
                target2 = target2.to(device)
                target3 = target3.to(device)
                
                outputs1, outputs2, outputs3 = model(inputs)
                
                loss1 = criterion(outputs1, target1.unsqueeze(1))
                loss2 = criterion(outputs2, target2.unsqueeze(1))
                loss3 = criterion(outputs3, target3.unsqueeze(1))
                
                val_loss += (loss1 + loss2 + loss3).item() * inputs.size(0)
        
        val_loss /= len(val_loader.dataset)
        print(f"Validation Loss: {val_loss:.4f}")

# 开始训练
train_model(model, criterion, optimizer, num_epochs=20)


Epoch [1/20], Loss: 3.4181
Validation Loss: 1.1202
Epoch [2/20], Loss: 1.5804
Validation Loss: 0.7739
Epoch [3/20], Loss: 1.5100
Validation Loss: 0.8218
Epoch [4/20], Loss: 1.5169
Validation Loss: 0.8000
Epoch [5/20], Loss: 1.4840
Validation Loss: 0.7590
Epoch [6/20], Loss: 1.4420
Validation Loss: 0.7614
Epoch [7/20], Loss: 1.4438
Validation Loss: 0.7532
Epoch [8/20], Loss: 1.3972
Validation Loss: 0.7760
Epoch [9/20], Loss: 1.4289
Validation Loss: 0.7748
Epoch [10/20], Loss: 1.4055
Validation Loss: 0.7769
Epoch [11/20], Loss: 1.4035
Validation Loss: 0.8009
Epoch [12/20], Loss: 1.3519
Validation Loss: 0.7562
Epoch [13/20], Loss: 1.3678
Validation Loss: 0.7638
Epoch [14/20], Loss: 1.3312
Validation Loss: 0.7740
Epoch [15/20], Loss: 1.2960
Validation Loss: 0.8247
Epoch [16/20], Loss: 1.3206
Validation Loss: 0.7585
Epoch [17/20], Loss: 1.3557
Validation Loss: 0.7741
Epoch [18/20], Loss: 1.2229
Validation Loss: 0.7689
Epoch [19/20], Loss: 1.2351
Validation Loss: 0.8432
Epoch [20/20], Loss: 

In [32]:
# 定义评估函数，用于计算每个任务的MSE
def evaluate_model_on_val_set(model, val_loader, device):
    model.eval()  # 设置模型为评估模式
    task_mse = {'Porosity': [], 'Permeability': [], 'Formation Factor': []}

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            target1, target2, target3 = targets
            target1 = target1.to(device)
            target2 = target2.to(device)
            target3 = target3.to(device)

            outputs1, outputs2, outputs3 = model(inputs)

            # 计算每个任务的MSE
            mse1 = torch.mean((outputs1 - target1.unsqueeze(1)) ** 2)
            mse2 = torch.mean((outputs2 - target2.unsqueeze(1)) ** 2)
            mse3 = torch.mean((outputs3 - target3.unsqueeze(1)) ** 2)

            # 将MSE添加到列表中
            task_mse['Porosity'].append(mse1.item())
            task_mse['Permeability'].append(mse2.item())
            task_mse['Formation Factor'].append(mse3.item())

    # 计算每个任务的平均MSE
    for task, mse_values in task_mse.items():
        average_mse = sum(mse_values) / len(mse_values)
        print(f"Average MSE for {task}: {average_mse:.4f}")

# 假设你已经有了验证集数据加载器val_loader
# device 已经定义为使用GPU或CPU
# model 是已经训练好的模型

# 使用验证集评估模型性能
evaluate_model_on_val_set(model, val_loader, device)

Average MSE for Porosity: 0.0117
Average MSE for Permeability: 0.3427
Average MSE for Formation Factor: 0.3971


In [33]:
import numpy as np
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# 定义评估函数，用于计算多个评估指标
def evaluate_model_multi_metrics(model, val_loader, device):
    model.eval()  # 设置模型为评估模式
    true_values = {'Porosity': [], 'Permeability': [], 'Formation Factor': []}
    predictions = {'Porosity': [], 'Permeability': [], 'Formation Factor': []}

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            target1, target2, target3 = targets
            target1 = target1.to(device)
            target2 = target2.to(device)
            target3 = target3.to(device)

            outputs1, outputs2, outputs3 = model(inputs)

            # 存储真实值和预测值
            true_values['Porosity'].append(target1.cpu().numpy())
            true_values['Permeability'].append(target2.cpu().numpy())
            true_values['Formation Factor'].append(target3.cpu().numpy())

            predictions['Porosity'].append(outputs1.cpu().numpy())
            predictions['Permeability'].append(outputs2.cpu().numpy())
            predictions['Formation Factor'].append(outputs3.cpu().numpy())

    # 将列表转换为数组
    for task in true_values.keys():
        true_values[task] = np.concatenate(true_values[task])
        predictions[task] = np.concatenate(predictions[task])

    # 计算评估指标
    results = {}
    for task in true_values.keys():
        mse = mean_squared_error(true_values[task], predictions[task])
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(true_values[task], predictions[task])
        r2 = r2_score(true_values[task], predictions[task])

        results[task] = {
            'MSE': mse,
            'RMSE': rmse,
            'MAE': mae,
            'R^2': r2
        }

    return results

# 使用验证集评估模型性能
results = evaluate_model_multi_metrics(model, val_loader, device)

# 打印结果
for task, metrics in results.items():
    print(f"Evaluation results for {task}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")

Evaluation results for Porosity:
  MSE: 0.0117
  RMSE: 0.1081
  MAE: 0.0982
  R^2: 0.9878
Evaluation results for Permeability:
  MSE: 0.3427
  RMSE: 0.5854
  MAE: 0.3259
  R^2: 0.6939
Evaluation results for Formation Factor:
  MSE: 0.3971
  RMSE: 0.6301
  MAE: 0.0834
  R^2: -0.0090
