In [1]:
!pip install datasets #open3d laspy



In [2]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
from datasets import load_dataset

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR # или другой scheduler
from torch.utils.data import Dataset, DataLoader
import wandb

# from pyproj import Transformer, CRS
# from mpl_toolkits.mplot3d import Axes3D
# import gc
# import laspy
# import open3d as o3d

import warnings
warnings.simplefilter(action='ignore')

from tqdm.auto import tqdm
tqdm.pandas()

In [3]:
train_df = pd.read_csv('/kaggle/input/power-line-security-zone-vegetation-detection/train.csv').drop_duplicates().reset_index(drop=True)
print('shape: ', train_df.shape)
train_df.head()

shape:  (4630, 9)


Unnamed: 0,file_name,center_x,center_y,center_z,size_x,size_y,size_z,yaw,class
0,Z3_cloud0.las,-663.347102,-1460.2743,18.188135,3.33763,7.925643,27.058957,-1.078151,LEP_metal
1,Z3_cloud0.las,-634.74975,-1405.2253,4.10385,4.792479,6.259897,2.6243,-1.272828,vegetation
2,Z3_cloud0.las,-607.36955,-1409.58785,3.3194,4.708849,3.992601,2.2916,-2.455875,vegetation
3,Z3_cloud0.las,-649.87155,-1396.08245,5.34305,3.506985,3.80166,3.6357,-2.056539,vegetation
4,Z3_cloud0.las,-632.8091,-1398.19915,5.8762,2.742089,2.52011,4.0556,-1.515782,vegetation


In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['class'])
classes = {i: class_name for i, class_name in enumerate(le.classes_)}

In [None]:
def group2box(row):
    return np.array([row['center_x'], row['center_y'], row['center_z'], row['size_x'], row['size_y'], row['size_z'], row['yaw'], row['class']])

train_df['answer'] = train_df.apply(group2box, axis=1)
train_df = train_df[['file_name', 'answer']]

In [None]:
new_df = train_df.groupby('file_name').agg(lambda x: np.array(x)).reset_index()
new_df['counts'] = new_df.answer.map(len)

def reshape_arrays(row):
    return np.concatenate(row['answer']).reshape(row['counts'], 8)
    
new_df['answer'] = new_df.apply(reshape_arrays, axis=1)

In [6]:
ds = load_dataset("Eka-Korn/power_line_lidar_data")

train_counts = pd.read_csv('/kaggle/input/power-line-counts/train_counts.csv').T.reset_index()
train_counts['index'] = train_counts['index'].map(lambda x: x[:-4])

test_counts = pd.read_csv('/kaggle/input/power-line-counts/test_counts.csv').T.reset_index()
test_counts['index'] = test_counts['index'].map(lambda x: x[:-4])

README.md:   0%|          | 0.00/452 [00:00<?, ?B/s]

train-00000-of-00005.parquet:   0%|          | 0.00/482M [00:00<?, ?B/s]

train-00001-of-00005.parquet:   0%|          | 0.00/482M [00:00<?, ?B/s]

train-00002-of-00005.parquet:   0%|          | 0.00/482M [00:00<?, ?B/s]

train-00003-of-00005.parquet:   0%|          | 0.00/481M [00:00<?, ?B/s]

train-00004-of-00005.parquet:   0%|          | 0.00/482M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/346M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/83980195 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/12056725 [00:00<?, ? examples/s]

In [9]:
workers = 4
batch_size = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lr = 3e-4
warmup = 20
# weight_decay = 1e-5
epochs = 100
warmup_steps = 10

In [25]:
class CustomDataset(Dataset):
    def __init__(self, sub_ds, counts_df, new_df):
        self.ds = sub_ds
        self.counts_df = counts_df
        self.new_df = new_df
    
    def __getitem__(self, i):
        row = self.counts_df.iloc[i]
        start = row[0]
        end = row[1]
        cur_df = self.ds[start:end]
        cur_df = pd.DataFrame(cur_df)[['x', 'y', 'z']].sort_values(by='x').reset_index(drop=True).T
        cur_df = torch.tensor(cur_df.values, dtype=torch.float)

        boxes = self.new_df[self.new_df['file_name'] == row['index']]['answer'].values[0]
        return cur_df, boxes
    
    def __len__(self):
        return len(self.counts_df)

    # batch_size = 1
    # def collate_fn(self, batch):

In [10]:
train_dataset = CustomDataset(ds['train'], train_counts, new_df)
test_dataset = CustomDataset(ds['test'], test_counts, new_df)

In [11]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=workers, pin_memory=True,)
                                           #collate_fn=train_dataset.collate_fn) 
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=workers, pin_memory=True,)
                                          #collate_fn=train_dataset.collate_fn, ) 

In [11]:
class Detector(nn.Module):
    
    def __init__(self, input_dim, num_coords, num_classes, alpha):
        super(Detector, self).__init__()
        self.alpha = alpha
        self.conv1 = nn.Conv1d(3, 32, 11)
        self.conv2 = nn.Conv1d(32, 128, 11)
        self.conv3 = nn.Conv1d(128, 256, 11)
        # self.conv5 = nn.Conv1d(256, 512, 3)
        
        # self.btchnorm1 = nn.BatchNorm1d(32)
        # self.btchnorm2 = nn.BatchNorm1d(64)
        # self.btchnorm3 = nn.BatchNorm1d(128)
        # self.btchnorm4 = nn.BatchNorm1d(256)
        # self.btchnorm5 = nn.BatchNorm1d(512)
        
        self.pool = nn.MaxPool1d(2)
        # self.lin1 = nn.Linear(512, 256)
        self.lin1 = nn.Linear(256, 128)
        self.lin2 = nn.Linear(128, 64)
        self.lin3 = nn.Linear(64, 10) # [cx, cy, cz, dx, dy, dz, yaw] + [class_0, class_1, class_2]

        self.relu = nn.ReLU()
        
        
    def forward(self, xyz, b):
        emb_dim = int(xyz.shape[2] // (b*self.alpha))
        x = list(xyz.split(emb_dim, dim=2))
        if x[-1].shape[-1] != emb_dim:
            x.pop(-1)
        x = torch.concatenate(x, dim=0)

        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        
        # x = self.pool(self.btchnorm1(self.relu(self.conv1(x))))
        # x = self.pool(self.btchnorm2(self.relu(self.conv2(x))))
        # x = self.pool(self.btchnorm3(self.relu(self.conv3(x))))
        # x = self.pool(self.btchnorm4(self.relu(self.conv4(x))))
        # x = self.pool(self.btchnorm5(self.relu(self.conv5(x))))
        
        # x = torch.mean(x, 2)[0]
        x = torch.max(x, 2)[0]
        x = self.relu(self.lin1(x))
        x = self.relu(self.lin2(x))
        x = self.lin3(x)
        # x = self.relu(self.lin3(x))
        # x = self.lin4(x)
        return x

In [12]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        self.criterion_reg = nn.SmoothL1Loss()
        self.criterion_class = nn.CrossEntropyLoss()
        
    def forward(self, boxes, preds):
        right_preds = []
        # as batch_size = 1
        boxes = boxes[0]
        for box in boxes:
            target_class = torch.tensor(box[-1], dtype=torch.long)
            target_xyz = torch.tensor(box[:-1], dtype=torch.float)
        
            min_loss, l = 10**9, -1
            for i, pred in enumerate(preds):
                pred_classes = pred[-3:]
                pred_coordinates = pred[:-3]
                loss_reg = self.criterion_reg(pred_coordinates, target_xyz)
                loss_class = self.criterion_class(pred_classes, target_class)
                loss = loss_reg# + loss_class
                if loss < min_loss:
                    min_loss = loss
                    l = i
                    
            right_preds.append(preds[l].unsqueeze(0))
            preds = torch.concatenate([preds[:l, :], preds[(l+1):, :]], dim=0)
        
        right_preds = torch.concatenate(right_preds, dim=0)
        
        target_class = torch.tensor(boxes[:, -1], dtype=torch.long)
        target_xyz = torch.tensor(boxes[:, :-1], dtype=torch.float)
        pred_classes = right_preds[:, -3:]
        pred_coordinates = right_preds[:, :-3]
        
        loss_reg = self.criterion_reg(pred_coordinates, target_xyz)
        loss_class = self.criterion_class(pred_classes, target_class)
        return loss_reg, loss_class

In [14]:
input_dim = 3 #xyz
num_coords = 7 # cx, cy, cz, dx, dy, dz, yaw
num_classes = 3 
alpha = 1.5 # must be >= 1
total_steps = len(train_loader) * epochs

In [15]:
model = Detector(input_dim, num_coords, num_classes, alpha).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)#, weight_decay=weight_decay)
criterion = CustomLoss().to(device)

In [17]:
scheduler = CosineAnnealingLR(optimizer, T_max=total_steps, eta_min=0)

In [20]:
wandb_project = "power_line_hack"
wandb_run_name = "custom_detection2"
wandb.init(project=wandb_project, name=wandb_run_name)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112878788890158, max=1.0…

In [21]:
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    model.train()
    train_loss = 0
    for i, (xyz, boxes) in enumerate(train_loader):
        preds = model(xyz.to(device), boxes.shape[1])
        loss_reg, loss_class = criterion(boxes.to(device), preds)
        train_loss = [loss_reg.item(), loss_class.item()]
        loss = loss_reg + loss_class
        print(f'Train Loss. Regression: {train_loss[0]}; Classification: {train_loss[1]}')
        
        optimizer.zero_grad()
        loss_reg.backward()
        optimizer.step()
        scheduler.step()
    
    model.eval()
    with torch.no_grad():
        for i, (xyz, boxes) in enumerate(test_loader):
            preds = model(xyz.to(device), boxes.shape[1])
            loss_reg, loss_class = criterion(boxes.to(device), preds)
            val_loss = [loss_reg.item(), loss_class.item()]
            print(f'Validation Loss. Regression: {val_loss[0]}; Classification: {val_loss[1]}')

    wandb.log({
            "epoch": epoch,
            "train/regression/last": train_loss[0],
            "train/classification/last": train_loss[1],
            "val/regression/last": val_loss[0],
            "val/classification/last": val_loss[1],
            "lr": optimizer.param_groups[0]["lr"]
        })

Epoch: 1
Train Loss. Regression: 279.783203125; Classification: 1.088423728942871
Train Loss. Regression: 343.342529296875; Classification: 1.0870205163955688
Train Loss. Regression: 654.8096923828125; Classification: 1.0734506845474243
Train Loss. Regression: 226.80911254882812; Classification: 0.9902727603912354
Train Loss. Regression: 228.51776123046875; Classification: 1.0876500606536865
Train Loss. Regression: 268.50762939453125; Classification: 1.0489633083343506
Train Loss. Regression: 284.477294921875; Classification: 1.198250412940979
Train Loss. Regression: 52.582088470458984; Classification: 1.0131810903549194
Train Loss. Regression: 301.4443664550781; Classification: 1.0657778978347778
Train Loss. Regression: 371.7803039550781; Classification: 1.0507498979568481
Train Loss. Regression: 250.6813507080078; Classification: 0.9941596984863281
Train Loss. Regression: 91.6534423828125; Classification: 1.030625820159912
Train Loss. Regression: 156.3503875732422; Classification: 0.

KeyboardInterrupt: 

In [28]:
# torch.save(model.state_dict, 'first_model_dict.pth')

## Make predictions

In [26]:
# path = '/kaggle/input/power-line-security-zone-vegetation-detection/test/test'
# las_files = os.listdir(path)
# for las_file in las_files:
#     print(las_file)
#     las_filepath = os.path.join(path, las_file)
#     las = laspy.read(las_filepath)
#     points = las.xyz - las.header.offset
#     pcd = o3d.geometry.PointCloud()
#     pcd.points = o3d.utility.Vector3dVector(points)
#     downpcd = pcd.voxel_down_sample(voxel_size=0.4)
#     df = pd.DataFrame(np.asarray(downpcd.points), columns=['x', 'y', 'z'])
#     df['file_name'] = las_file
#     df.to_csv(f'{las_file}.csv', index=False)
        
#     del downpcd
#     del points
#     del df
#     gc.collect()

Day2_5_cloud1.las
Z13_cloud0.las
MP6_cloud0.las
MP2_cloud0.las
Day2_4_cloud2.las
Day2_8_cloud1.las
Day2_1_cloud2.las
Day3_cloud4.las


In [27]:
# output_dir = '/kaggle/working'
# files = [os.path.join(output_dir, x) for x in os.listdir(output_dir)]
# files.remove('/kaggle/working/.virtual_documents')
# files.remove('/kaggle/working/wandb')

In [30]:
# def make_count(files):
#     count = 0
#     counts = {}
#     for file in files:
#         df = pd.read_csv(file)
#         n = len(df)
#         counts[file.split('/')[-1]] = [count, count+n]
#         count += n
#     pd.DataFrame(counts).to_csv(f'counts.csv', index=False)

# make_count(files)

In [None]:
# dataset = load_dataset("csv", data_files={"test": files})
# dataset.push_to_hub('power_line_lidar_data_test', token=)

Generating test split: 0 examples [00:00, ? examples/s]

In [18]:
ds = load_dataset("Eka-Korn/power_line_lidar_data_test")

README.md:   0%|          | 0.00/381 [00:00<?, ?B/s]

test-00000-of-00003.parquet:   0%|          | 0.00/254M [00:00<?, ?B/s]

test-00001-of-00003.parquet:   0%|          | 0.00/254M [00:00<?, ?B/s]

test-00002-of-00003.parquet:   0%|          | 0.00/254M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/26246944 [00:00<?, ? examples/s]

In [19]:
class CustomDataset(Dataset):
    def __init__(self, sub_ds, counts_df):
        self.ds = sub_ds
        self.counts_df = counts_df
    
    def __getitem__(self, i):
        row = self.counts_df.iloc[i]
        start, end = row[0], row[1]
        cur_df = self.ds[start:end]
        cur_df = pd.DataFrame(cur_df)[['x', 'y', 'z']].sort_values(by='x').reset_index(drop=True).T
        cur_df = torch.tensor(cur_df.values, dtype=torch.float)

        return cur_df
    
    def __len__(self):
        return len(self.counts_df)

    # batch_size = 1
    # def collate_fn(self, batch):

In [20]:
counts = pd.read_csv('/kaggle/input/counts/counts.csv').T.reset_index()
counts['index'] = counts['index'].map(lambda x: x[:-4])

In [21]:
test_dataset = CustomDataset(ds['test'], counts)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, num_workers=4, pin_memory=True,)

In [45]:
b = 100 # ???????????????

In [27]:
model = torch.load('/kaggle/input/27nov/pytorch/default/1/first_model.pth').to(device)

In [46]:
sft = nn.Softmax(dim=1)

In [54]:
model.eval()
predicts = []
with torch.no_grad():
    for i, xyz in enumerate(test_loader):
        preds = model(xyz.to(device), b) # [b, 10]
        out_classes = sft(preds[:, -3:]).cpu().numpy() # probabilities
        
        pred_classes, pred_coordinates, mxs = [], [], []
        for j, out_class in enumerate(out_classes):
            mx = max(out_class)
            if mx > 0.75:
                pred_classes.append(classes[out_class.argmax()])
                pred_coordinates.append(preds[j, :-3])
                mxs.append(mx)
                
        predicts.append({'classes': pred_classes,
                         'coordinates': pred_coordinates,
                         'maximums': mxs,
                         'ind': i})

In [73]:
for pred in predicts:
    print(len(pred['classes']))
    # print(pred['classes'])

141
140
137
123
39
146
147
150


In [56]:
submission = pd.read_csv('/kaggle/input/power-line-security-zone-vegetation-detection/sample_submission.csv')
submission.columns

Index(['id', 'file_name', 'center_x', 'center_y', 'center_z', 'size_x',
       'size_y', 'size_z', 'yaw', 'class', 'score'],
      dtype='object')

In [65]:
result = pd.DataFrame()
all_count = 0

In [66]:
for i, pred in enumerate(predicts):
    classes, coords, mxs = pred['classes'], pred['coordinates'], pred['maximums']
    file_name = counts.iloc[i]['index']
    for j in range(len(classes)):
        coord = coords[j].cpu().numpy()
        data = {'file_name': file_name,
                'center_x': coord[0],
                'center_y': coord[1],
                'center_z': coord[2],
                'size_x': coord[3],
                'size_y': coord[4],
                'size_z': coord[5],
                'yaw': coord[6],
                'class': classes[j],
                'score': mxs[j]}
        result[all_count] = data
        all_count += 1

In [67]:
result = result.T
result

Unnamed: 0,file_name,center_x,center_y,center_z,size_x,size_y,size_z,yaw,class,score
0,Day2_1_cloud2.las,-2460.726562,2310.393311,-34.62962,17.690531,36.795872,61.232281,-17.588745,vegetation,1.0
1,Day2_1_cloud2.las,-2412.824219,2331.947021,-34.188187,18.539021,35.901814,61.799576,-17.817085,vegetation,1.0
2,Day2_1_cloud2.las,-2395.852051,2340.641113,-34.428726,18.528986,35.705551,62.167385,-17.570004,vegetation,1.0
3,Day2_1_cloud2.las,-2372.934814,2340.275879,-34.022388,18.666121,34.965778,61.422112,-16.994843,vegetation,1.0
4,Day2_1_cloud2.las,-2339.98877,2262.800293,-33.302311,18.102875,34.988579,60.540722,-17.627104,vegetation,1.0
...,...,...,...,...,...,...,...,...,...,...
1018,Day2_8_cloud1.las,2303.327393,91.570671,-42.063053,4.450402,12.056117,-15.297277,50.462536,vegetation,1.0
1019,Day2_8_cloud1.las,2304.204346,96.39756,-41.720791,4.562212,12.493543,-15.038206,50.47155,vegetation,1.0
1020,Day2_8_cloud1.las,2304.080811,103.14077,-41.412327,4.856684,12.933289,-14.816706,50.532631,vegetation,1.0
1021,Day2_8_cloud1.las,2306.985596,101.937744,-41.385998,4.677647,12.890077,-14.833774,50.580597,vegetation,1.0


In [68]:
result = result.reset_index()
result

Unnamed: 0,index,file_name,center_x,center_y,center_z,size_x,size_y,size_z,yaw,class,score
0,0,Day2_1_cloud2.las,-2460.726562,2310.393311,-34.62962,17.690531,36.795872,61.232281,-17.588745,vegetation,1.0
1,1,Day2_1_cloud2.las,-2412.824219,2331.947021,-34.188187,18.539021,35.901814,61.799576,-17.817085,vegetation,1.0
2,2,Day2_1_cloud2.las,-2395.852051,2340.641113,-34.428726,18.528986,35.705551,62.167385,-17.570004,vegetation,1.0
3,3,Day2_1_cloud2.las,-2372.934814,2340.275879,-34.022388,18.666121,34.965778,61.422112,-16.994843,vegetation,1.0
4,4,Day2_1_cloud2.las,-2339.98877,2262.800293,-33.302311,18.102875,34.988579,60.540722,-17.627104,vegetation,1.0
...,...,...,...,...,...,...,...,...,...,...,...
1018,1018,Day2_8_cloud1.las,2303.327393,91.570671,-42.063053,4.450402,12.056117,-15.297277,50.462536,vegetation,1.0
1019,1019,Day2_8_cloud1.las,2304.204346,96.39756,-41.720791,4.562212,12.493543,-15.038206,50.47155,vegetation,1.0
1020,1020,Day2_8_cloud1.las,2304.080811,103.14077,-41.412327,4.856684,12.933289,-14.816706,50.532631,vegetation,1.0
1021,1021,Day2_8_cloud1.las,2306.985596,101.937744,-41.385998,4.677647,12.890077,-14.833774,50.580597,vegetation,1.0


In [69]:
result = result.rename(columns={'index': 'id'})

In [70]:
result

Unnamed: 0,id,file_name,center_x,center_y,center_z,size_x,size_y,size_z,yaw,class,score
0,0,Day2_1_cloud2.las,-2460.726562,2310.393311,-34.62962,17.690531,36.795872,61.232281,-17.588745,vegetation,1.0
1,1,Day2_1_cloud2.las,-2412.824219,2331.947021,-34.188187,18.539021,35.901814,61.799576,-17.817085,vegetation,1.0
2,2,Day2_1_cloud2.las,-2395.852051,2340.641113,-34.428726,18.528986,35.705551,62.167385,-17.570004,vegetation,1.0
3,3,Day2_1_cloud2.las,-2372.934814,2340.275879,-34.022388,18.666121,34.965778,61.422112,-16.994843,vegetation,1.0
4,4,Day2_1_cloud2.las,-2339.98877,2262.800293,-33.302311,18.102875,34.988579,60.540722,-17.627104,vegetation,1.0
...,...,...,...,...,...,...,...,...,...,...,...
1018,1018,Day2_8_cloud1.las,2303.327393,91.570671,-42.063053,4.450402,12.056117,-15.297277,50.462536,vegetation,1.0
1019,1019,Day2_8_cloud1.las,2304.204346,96.39756,-41.720791,4.562212,12.493543,-15.038206,50.47155,vegetation,1.0
1020,1020,Day2_8_cloud1.las,2304.080811,103.14077,-41.412327,4.856684,12.933289,-14.816706,50.532631,vegetation,1.0
1021,1021,Day2_8_cloud1.las,2306.985596,101.937744,-41.385998,4.677647,12.890077,-14.833774,50.580597,vegetation,1.0


In [71]:
result['id'] += 1
result

Unnamed: 0,id,file_name,center_x,center_y,center_z,size_x,size_y,size_z,yaw,class,score
0,1,Day2_1_cloud2.las,-2460.726562,2310.393311,-34.62962,17.690531,36.795872,61.232281,-17.588745,vegetation,1.0
1,2,Day2_1_cloud2.las,-2412.824219,2331.947021,-34.188187,18.539021,35.901814,61.799576,-17.817085,vegetation,1.0
2,3,Day2_1_cloud2.las,-2395.852051,2340.641113,-34.428726,18.528986,35.705551,62.167385,-17.570004,vegetation,1.0
3,4,Day2_1_cloud2.las,-2372.934814,2340.275879,-34.022388,18.666121,34.965778,61.422112,-16.994843,vegetation,1.0
4,5,Day2_1_cloud2.las,-2339.98877,2262.800293,-33.302311,18.102875,34.988579,60.540722,-17.627104,vegetation,1.0
...,...,...,...,...,...,...,...,...,...,...,...
1018,1019,Day2_8_cloud1.las,2303.327393,91.570671,-42.063053,4.450402,12.056117,-15.297277,50.462536,vegetation,1.0
1019,1020,Day2_8_cloud1.las,2304.204346,96.39756,-41.720791,4.562212,12.493543,-15.038206,50.47155,vegetation,1.0
1020,1021,Day2_8_cloud1.las,2304.080811,103.14077,-41.412327,4.856684,12.933289,-14.816706,50.532631,vegetation,1.0
1021,1022,Day2_8_cloud1.las,2306.985596,101.937744,-41.385998,4.677647,12.890077,-14.833774,50.580597,vegetation,1.0


In [72]:
result.to_csv('submision.csv', index=False)

In [31]:
# path = '/kaggle/input/power-line-security-zone-vegetation-detection/train/train'
# las_files = os.listdir(path)
# for las_file in las_files:
#     print(las_file)
#     las_filepath = os.path.join(path, las_file)
#     las = laspy.read(las_filepath)
#     points = las.xyz - las.header.offset
#     pcd = o3d.geometry.PointCloud()
#     pcd.points = o3d.utility.Vector3dVector(points)
#     downpcd = pcd.voxel_down_sample(voxel_size=0.4)
#     df = pd.DataFrame(np.asarray(downpcd.points), columns=['x', 'y', 'z'])
#     df['file_name'] = las_file
#     df.to_csv(f'{las_file}.csv', index=False)
        
#     del downpcd
#     del points
#     del df
#     gc.collect()

Day2_1_cloud0.las
Z11_cloud0.las
MP4_cloud1.las
NK3o1_cloud1.las
Day3_cloud6.las
NK3o1_cloud2.las
MP3_cloud0.las
Z8_cloud0.las
NK3o1_cloud0.las
NK0_cloud0.las
Day3_cloud0.las
Z1_cloud0.las
Day2_4_cloud0.las
Z7_cloud0.las
Z4_cloud1.las
Day2_6_cloud3.las
MP5_cloud0.las
Day3_cloud1.las
Day2_8_cloud0.las
NK2o1_cloud2.las
Day2_8_cloud2.las
Z9_cloud0.las
Z2_cloud0.las
Day2_7_cloud1.las
Day2_5_cloud0.las
Day2_2_cloud1.las
NK2o1_cloud0.las
Day3_cloud2.las
Z3_cloud0.las
Z12_cloud0.las
Day2_6_cloud2.las
Day2_6_cloud1.las
MP4_cloud0.las
Day3_cloud3.las
NK2o1_cloud1.las
Day2_4_cloud1.las
Z6_cloud0.las


In [179]:
# output_dir = '/kaggle/working'
# files = [os.path.join(output_dir, x) for x in os.listdir(output_dir)]
# files.remove('/kaggle/working/.virtual_documents')
# n = len(files)
# test_size = 0.1
# test_part = max(1, int(test_size * n))
# test_files, train_files = files[:test_part], files[test_part:]

In [11]:
# def make_count(sub, files)
#     count = 0
#     counts = {}
#     for file in files:
#         df = pd.read_csv(file)
#         n = len(df)
#         counts[file.split('/')[-1]] = [count, count+n]
#         count += n
#     pd.DataFrame(counts).to_csv(f'{sub}_counts.csv', index=False)

# make_count('train', train_files)
# make_count('test', test_files)

In [16]:
# from datasets import load_dataset

# dataset = load_dataset("csv", data_files={"train": train_files, "test": test_files})
# dataset.push_to_hub('power_line_lidar_data', token=)

Resolving data files:   0%|          | 0/34 [00:00<?, ?it/s]

In [9]:
# fig = plt.figure(figsize=(15, 10))
# ax = fig.add_subplot(111, projection='3d')

# ax.scatter(xyz_df['x'][70000], xyz_df['y'][:70000], xyz_df['z'][:70000], s=1, alpha=0.5)

# # ax.set_title(row['class'])
# ax.set_xlabel("X")
# ax.set_ylabel("Y")
# ax.set_zlabel("Z")

# plt.show()