# Activity 8 | Linear Regression

Simple linear regression (Width, Height, Sex) with multi-variable and categories.

Dataset with Height, Weight, Sex statistics from: 

https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/master/data/weight-height.csv

**Swift with SciKit Learn MinMax normalization**

Use Python/Pandas to import the dataset Use SciKit Learn to normalize values with MinMax scaler
Based on https://github.com/JacopoMangiavacchi/Swift-TensorFlow-Sample-Notebooks

In [0]:
import Python
import TensorFlow

func getNumpyNormalizedDataset() -> (PythonObject, PythonObject) 
{
    let numpy = Python.import("numpy")
    let pandas = Python.import("pandas")
    let io = Python.import("io")
    let requests = Python.import("requests")
    let preprocessing = Python.import("sklearn.preprocessing")


    let url="https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_video/master/data/weight-height.csv"
    let s = requests.get(url).content
    let df = pandas.read_csv(io.StringIO(s.decode("utf-8")))

    let dummies = pandas.get_dummies(df[["Gender"]])
    let transformed = pandas.concat([df[["Height", "Weight"]], dummies], 1)
    print(transformed)

    let X = transformed[["Height","Gender_Female","Gender_Male"]].values
    let Y = transformed[["Weight"]].values

    let scaler = preprocessing.MinMaxScaler()
    let xNP = numpy.array(scaler.fit_transform(X))
    let yNP = numpy.array(scaler.fit_transform(Y))  
    
    return (xNP, yNP)
}

In [0]:
struct LinearRegression: Layer 
{
    var l1: Dense<Float>
    init(variables: Int = 1) 
    {
        l1 = Dense<Float>(inputSize: variables, outputSize: 1, activation: identity)
    }
  
    @differentiable func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float>
    {
       return l1(input)
    }
}

In [50]:
let (xNP, yNP) = getNumpyNormalizedDataset()

let xArray = xNP.tolist().flatMap{ $0.map{ Float($0)! }}
let yArray = yNP.tolist().flatMap{ $0.map{ Float($0)! }}

let x = Tensor<Float>(shape: [10000, 3], scalars: xArray)
let y = Tensor<Float>(shape: [10000, 1], scalars: yArray)

         Height  ...  Gender_Male
0     73.847017  ...            1
1     68.781904  ...            1
2     74.110105  ...            1
3     71.730978  ...            1
4     69.881796  ...            1
...         ...  ...          ...
9995  66.172652  ...            0
9996  67.067155  ...            0
9997  63.867992  ...            0
9998  69.034243  ...            0
9999  61.944246  ...            0

[10000 rows x 4 columns]


In [51]:
let optimizer = SGD(for: model, learningRate: 0.03)
var model = LinearRegression(variables: 3)

for epoch in 1...2000 {
    let (cost, 𝛁model) = model.valueWithGradient { m -> Tensor<Float> in
        let ŷ = m(x)
        return meanSquaredError(predicted: ŷ, expected: y)
    }
    optimizer.update(&model, along: 𝛁model)
  
    if epoch % 100 == 0 {
        print("Epoch: \(epoch) Cost: \(cost)")
    }
}

Epoch: 100 Cost: 0.006617514
Epoch: 200 Cost: 0.00607133
Epoch: 300 Cost: 0.0056273188
Epoch: 400 Cost: 0.005236763
Epoch: 500 Cost: 0.0048931628
Epoch: 600 Cost: 0.0045908713
Epoch: 700 Cost: 0.0043249247
Epoch: 800 Cost: 0.004090949
Epoch: 900 Cost: 0.003885106
Epoch: 1000 Cost: 0.00370401
Epoch: 1100 Cost: 0.0035446854
Epoch: 1200 Cost: 0.0034045146
Epoch: 1300 Cost: 0.0032811984
Epoch: 1400 Cost: 0.0031727082
Epoch: 1500 Cost: 0.0030772602
Epoch: 1600 Cost: 0.002993288
Epoch: 1700 Cost: 0.0029194108
Epoch: 1800 Cost: 0.0028544152
Epoch: 1900 Cost: 0.0027972348
Epoch: 2000 Cost: 0.0027469285


In [53]:
print(model.inferring(from:[[0.7, 0, 1]])) //Height, Female, Male

[[0.65355074]]
