In [1]:
import jupyter_ai
%reload_ext jupyter_ai_magics
%reload_ext dotenv
%dotenv /root/key.env
%ai list

cannot find .env file


| Provider | Environment variable | Set? | Models |
|----------|----------------------|------|--------|
| `ai21` | `AI21_API_KEY` | <abbr title="You have not set this environment variable, so you cannot use this provider's models.">❌</abbr> | `ai21:j1-large`, `ai21:j1-grande`, `ai21:j1-jumbo`, `ai21:j1-grande-instruct`, `ai21:j2-large`, `ai21:j2-grande`, `ai21:j2-jumbo`, `ai21:j2-grande-instruct`, `ai21:j2-jumbo-instruct` |
| `bedrock` | Not applicable. | <abbr title="Not applicable">N/A</abbr> | `bedrock:amazon.titan-tg1-large`, `bedrock:anthropic.claude-v1`, `bedrock:anthropic.claude-instant-v1`, `bedrock:anthropic.claude-v2`, `bedrock:ai21.j2-jumbo-instruct`, `bedrock:ai21.j2-grande-instruct` |
| `anthropic` | `ANTHROPIC_API_KEY` | <abbr title="You have not set this environment variable, so you cannot use this provider's models.">❌</abbr> | `anthropic:claude-v1`, `anthropic:claude-v1.0`, `anthropic:claude-v1.2`, `anthropic:claude-2`, `anthropic:claude-instant-v1`, `anthropic:claude-instant-v1.0` |
| `azure-chat-openai` | `OPENAI_API_KEY` | <abbr title="You have set this environment variable, so you can use this provider's models.">✅</abbr> | This provider does not define a list of models. |
| `cohere` | `COHERE_API_KEY` | <abbr title="You have not set this environment variable, so you cannot use this provider's models.">❌</abbr> | `cohere:medium`, `cohere:xlarge` |
| `gpt4all` | Not applicable. | <abbr title="Not applicable">N/A</abbr> | `gpt4all:ggml-gpt4all-j-v1.2-jazzy`, `gpt4all:ggml-gpt4all-j-v1.3-groovy`, `gpt4all:ggml-gpt4all-l13b-snoozy` |
| `huggingface_hub` | `HUGGINGFACEHUB_API_TOKEN` | <abbr title="You have not set this environment variable, so you cannot use this provider's models.">❌</abbr> | See https://huggingface.co/models for a list of models. Pass a model's repository ID as the model ID; for example, `huggingface_hub:ExampleOwner/example-model`. |
| `openai` | `OPENAI_API_KEY` | <abbr title="You have set this environment variable, so you can use this provider's models.">✅</abbr> | `openai:text-davinci-003`, `openai:text-davinci-002`, `openai:text-curie-001`, `openai:text-babbage-001`, `openai:text-ada-001`, `openai:davinci`, `openai:curie`, `openai:babbage`, `openai:ada` |
| `openai-chat` | `OPENAI_API_KEY` | <abbr title="You have set this environment variable, so you can use this provider's models.">✅</abbr> | `openai-chat:gpt-3.5-turbo`, `openai-chat:gpt-3.5-turbo-16k`, `openai-chat:gpt-3.5-turbo-0301`, `openai-chat:gpt-3.5-turbo-0613`, `openai-chat:gpt-3.5-turbo-16k-0613`, `openai-chat:gpt-4`, `openai-chat:gpt-4-0314`, `openai-chat:gpt-4-0613`, `openai-chat:gpt-4-32k`, `openai-chat:gpt-4-32k-0314`, `openai-chat:gpt-4-32k-0613` |
| `openai-chat-new` | `OPENAI_API_KEY` | <abbr title="You have set this environment variable, so you can use this provider's models.">✅</abbr> | `openai-chat-new:gpt-3.5-turbo`, `openai-chat-new:gpt-3.5-turbo-16k`, `openai-chat-new:gpt-3.5-turbo-0301`, `openai-chat-new:gpt-3.5-turbo-0613`, `openai-chat-new:gpt-3.5-turbo-16k-0613`, `openai-chat-new:gpt-4`, `openai-chat-new:gpt-4-0314`, `openai-chat-new:gpt-4-0613`, `openai-chat-new:gpt-4-32k`, `openai-chat-new:gpt-4-32k-0314`, `openai-chat-new:gpt-4-32k-0613` |
| `sagemaker-endpoint` | Not applicable. | <abbr title="Not applicable">N/A</abbr> | Specify an endpoint name as the model ID. In addition, you must include the `--region_name`, `--request_schema`, and the `--response_path` arguments. For more information, see the documentation about [SageMaker endpoints deployment](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints-deployment.html) and about [using magic commands with SageMaker endpoints](https://jupyter-ai.readthedocs.io/en/latest/users/index.html#using-magic-commands-with-sagemaker-endpoints). |

Aliases and custom commands:

| Name | Target |
|------|--------|
| `gpt2` | `huggingface_hub:gpt2` |
| `gpt3` | `openai:text-davinci-003` |
| `chatgpt` | `openai-chat:gpt-3.5-turbo` |
| `gpt4` | `openai-chat:gpt-4` |
| `titan` | `bedrock:amazon.titan-tg1-large` |


In [3]:
%%ai openai-chat:gpt-3.5-turbo
请帮我写一个使用PII数据集训练GCN模型的代码

以下是使用PII数据集训练GCN模型的示例代码：

```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# 定义GCN模型
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, adj_matrix):
        x = torch.relu(self.fc1(torch.matmul(adj_matrix, x)))
        x = self.fc2(torch.matmul(adj_matrix, x))
        return torch.softmax(x, dim=-1)

# 定义PII数据集类
class PIIDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __getitem__(self, index):
        x = self.data[index]
        y = self.labels[index]
        return x, y

    def __len__(self):
        return len(self.data)

# 加载PII数据集
data = np.load('pii_data.npy')  # PII数据
labels = np.load('pii_labels.npy')  # 标签

# 标签编码
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# 构建数据加载器
train_dataset = PIIDataset(X_train, y_train)
test_dataset = PIIDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 训练函数
def train(model, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.float()
            labels = labels.long()

            optimizer.zero_grad()

            outputs = model(inputs, adj_matrix)  # 使用GCN进行前向传播

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch: {epoch+1}, Loss: {running_loss / len(train_loader)}")

# 创建GCN模型
input_dim = data.shape[1]  # 输入维度
hidden_dim = 64  # 隐藏层维度
output_dim = len(label_encoder.classes_)  # 输出维度，即标签类别数

model = GCN(input_dim, hidden_dim, output_dim)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 开始训练
num_epochs = 30

train(model, criterion, optimizer, num_epochs)
```

请注意，上述代码中的`adj_matrix`是标准化的邻接矩阵，它应根据具体的图结构进行构建。请根据自己的数据集和图结构相应地修改代码。此外，还需根据实际数据集的特征维度和类别数调整模型的输入和输出维度。

In [5]:
import pandas as pd
data = pd.read_csv("./data/OA/samples.csv")
(data.head())

Unnamed: 0,ID,commonXingb,commonNl,commonSg,commonTz,commonStFz,commonQx,commonQxYn,commonSm,commonSmRskn,...,commonDiseaseCode,commonTzPhz,commonTzQxz,commonTzYaxz,commonTzYixz,commonTzTsz,commonTzSrz,commonTzXyz,commonTzQyz,commonTzTbz
0,1,0,0.62963,0.890244,0.505882,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,2,0,0.864198,0.829268,0.270588,0,0,0,1,1,...,0,1,0,0,0,0,0,0,0,0
2,3,1,0.802469,0.878049,0.305882,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,4,0,0.617284,0.756098,0.270588,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,5,0,0.493827,0.682927,0.435294,0,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0


In [9]:
data.info()
print(data.describe(include='all')) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1421 entries, 0 to 1420
Columns: 547 entries, ID to commonTzTbz
dtypes: float64(4), int64(543)
memory usage: 5.9 MB
                ID  commonXingb     commonNl     commonSg     commonTz  \
count  1421.000000  1421.000000  1421.000000  1421.000000  1421.000000   
mean    711.000000     0.303308     0.601258     0.753596     0.324604   
std     410.351678     0.459849     0.145169     0.108565     0.120739   
min       1.000000     0.000000     0.000000     0.000000     0.000000   
25%     356.000000     0.000000     0.506173     0.682927     0.247059   
50%     711.000000     0.000000     0.601258     0.743902     0.317647   
75%    1066.000000     1.000000     0.691358     0.829268     0.388235   
max    1421.000000     1.000000     1.000000     1.000000     1.000000   

        commonStFz     commonQx   commonQxYn     commonSm  commonSmRskn  ...  \
count  1421.000000  1421.000000  1421.000000  1421.000000   1421.000000  ...   
mean   

In [11]:
data = pd.read_csv("./data/OA/train_label.csv")
(data.head())

Unnamed: 0,commonDiseaseCode,commonTzPhz,commonTzQxz,commonTzYaxz,commonTzYixz,commonTzTsz,commonTzSrz,commonTzXyz,commonTzQyz,commonTzTbz
0,0,0,1,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0
