### Some code about **Pandas**

In [1]:
!pip install pandas



In [2]:
import os

os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('''NumRooms,RoofType,Price
NA,NA,127500
2,NA,106000
4,Slate,178100
NA,NA,140000''')

### Reading the Dataset

In [3]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

   NumRooms RoofType   Price
0       NaN      NaN  127500
1       2.0      NaN  106000
2       4.0    Slate  178100
3       NaN      NaN  140000


### Data Preparation

In [4]:
inputs = data.iloc[:, 0:2]
targets = data.iloc[:, 2]

print(inputs, "\n")
print(targets, "\n")

inputs = pd.get_dummies(inputs, dummy_na=True)

print(inputs)

   NumRooms RoofType
0       NaN      NaN
1       2.0      NaN
2       4.0    Slate
3       NaN      NaN 

0    127500
1    106000
2    178100
3    140000
Name: Price, dtype: int64 

   NumRooms  RoofType_Slate  RoofType_nan
0       NaN           False          True
1       2.0           False          True
2       4.0            True         False
3       NaN           False          True


In [5]:
inputs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   NumRooms        2 non-null      float64
 1   RoofType_Slate  4 non-null      bool   
 2   RoofType_nan    4 non-null      bool   
dtypes: bool(2), float64(1)
memory usage: 168.0 bytes


In [6]:
# Fill NaN of inputs df:
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms  RoofType_Slate  RoofType_nan
0       3.0           False          True
1       2.0           False          True
2       4.0            True         False
3       3.0           False          True


### Conversion to the Tensor Format

In [13]:
# Note, before convert to tensors make sure that the values type is numerical:
import torch

# First we convert from pandas dataframe to numpy arrays:
in_to_numpy = inputs.to_numpy(dtype=float)
ta_to_numpy = targets.to_numpy(dtype=float)

print(in_to_numpy, "\n")
print(ta_to_numpy, "\n")

# Second we convert from numpy arrays to pytorch tensors:
x = torch.tensor(in_to_numpy)
y = torch.tensor(ta_to_numpy)

print(x, "\n")
print(y, "\n")

[[3. 0. 1.]
 [2. 0. 1.]
 [4. 1. 0.]
 [3. 0. 1.]] 

[127500. 106000. 178100. 140000.] 

tensor([[3., 0., 1.],
        [2., 0., 1.],
        [4., 1., 0.],
        [3., 0., 1.]], dtype=torch.float64) 

tensor([127500., 106000., 178100., 140000.], dtype=torch.float64) 



In [14]:
y *= 2
y

tensor([127500., 212000., 534300., 560000.], dtype=torch.float64)