In [1]:
using CSV
using DataFrames
using Random

using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights
using Flux
using Flux: onehot, onecold, onehotbatch
using Flux: crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA

In [2]:

# Load data
data = CSV.read("fin_reviews2.csv", DataFrame)

# Set the random seed for reproducibility
Random.seed!(123)

# Determine the split ratios
train_ratio = 0.9
validation_ratio = 0.04
test_ratio = 0.06

# Calculate the split indices
n_samples = nrow(data)
train_index = Int(round(train_ratio * n_samples))
validation_index = Int(round((train_ratio + validation_ratio) * n_samples))

# Shuffle the data
shuffle!(data)

# Split the data
train_data = data[1:train_index, :]
validation_data = data[train_index+1:validation_index, :]
test_data = data[validation_index+1:end, :]

# Prepare the matrices for the models
X_train = Matrix(train_data[:, 8:end])
y_train = train_data.stars
X_valid = Matrix(validation_data[:, 8:end])
y_valid = validation_data.stars
X_test = Matrix(test_data[:, 8:end])
y_test = test_data.stars


3600-element Vector{Float64}:
 5.0
 5.0
 1.0
 5.0
 5.0
 4.0
 4.0
 5.0
 4.0
 1.0
 5.0
 1.0
 5.0
 ⋮
 4.0
 4.0
 5.0
 5.0
 5.0
 5.0
 5.0
 5.0
 1.0
 3.0
 4.0
 1.0

In [3]:
"""
using XGBoost

# Convert labels to zero-based for XGBoost
y_train_xgb = y_train .- 1
y_valid_xgb = y_valid .- 1
y_test_xgb = y_test .- 1

# Hyperparameter search setup
best_score = -Inf
best_params = nothing

dtrain = DMatrix(X_train, label=y_train_xgb)
dvalid = DMatrix(X_valid, label=y_valid_xgb)
dtest = DMatrix(X_test, label=y_test_xgb)

for eta in [0.01, 0.001]
    for max_depth in [1, 3, 6, 9]
        for num_round in [100, 200,300]
            param = (
                "eta" => eta, 
                "max_depth" => max_depth, 
                "objective" => "multi:softprob", 
                "num_class" => 5,
                "eval_metric" => "mlogloss" 
            )
            model = xgboost(dtrain; eta=eta,max_depth=max_depth, num_round=num_round)
            preds = round.(predict(model, dvalid))
           
            accuracy = sum(preds.== y_valid_xgb) / length(y_valid_xgb)
            if accuracy > best_score
                best_score = accuracy
                best_params = (param, num_round)
                @info "Hyperparameters: $param, Num Rounds: $num_round, Accuracy: $accuracy"
            end
        end
    end
end

# Train final model with best parameters
best_param, best_num_round = best_params
final_model = xgboost(dtrain; best_param, num_round=best_num_round)
preds = round.(predict(final_model, dtest))

accuracy = sum(preds .== y_test_xgb) / length(y_test_xgb)
println("XGBoost Accuracy on Test Set: ", accuracy)

using JLD2
@save "xgboost_model.jld2" own_final
"""

LoadError: UndefVarError: `param` not defined

In [4]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model = Chain(
    Dense(300, 128, relu),
    Dense(128, 64, relu),
    Dense(64, 5),
    softmax
) |> device

# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(Adam(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
    last_imp+=1
    if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "bestmlp3layersown.bson" model_params
        last_imp=0
    end
    if last_imp>10
        break
    end
end



# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.6166666666666667
Epoch 2: Validation Accuracy = 0.6291666666666667
Epoch 3: Validation Accuracy = 0.6279166666666667
Epoch 4: Validation Accuracy = 0.6041666666666666
Epoch 5: Validation Accuracy = 0.6291666666666667
Epoch 6: Validation Accuracy = 0.6316666666666667
Epoch 7: Validation Accuracy = 0.6233333333333333
Epoch 8: Validation Accuracy = 0.6245833333333334
Epoch 9: Validation Accuracy = 0.6279166666666667
Epoch 10: Validation Accuracy = 0.6308333333333334
Epoch 11: Validation Accuracy = 0.6241666666666666
Epoch 12: Validation Accuracy = 0.64
Epoch 13: Validation Accuracy = 0.6304166666666666
Epoch 14: Validation Accuracy = 0.6295833333333334
Epoch 15: Validation Accuracy = 0.6316666666666667
Epoch 16: Validation Accuracy = 0.6375
Epoch 17: Validation Accuracy = 0.6258333333333334
Epoch 18: Validation Accuracy = 0.6329166666666667
Epoch 19: Validation Accuracy = 0.6354166666666666
Epoch 20: Validation Accuracy = 0.6370833333333333
Epoch 21: Valid

In [5]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model = Chain(
    Dense(300, 128, relu),
    Dense(128, 64, relu),
    Dense(64, 5),
    softmax
) |> device

# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(AdaGrad(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
      last_imp+=1
      if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "bestmlp3layersownada.bson" model_params   
        last_imp=0
    end
    if last_imp>10
        break
    end
end



# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.61
Epoch 2: Validation Accuracy = 0.61625
Epoch 3: Validation Accuracy = 0.6183333333333333
Epoch 4: Validation Accuracy = 0.6195833333333334
Epoch 5: Validation Accuracy = 0.6225
Epoch 6: Validation Accuracy = 0.625
Epoch 7: Validation Accuracy = 0.6254166666666666
Epoch 8: Validation Accuracy = 0.6279166666666667
Epoch 9: Validation Accuracy = 0.6266666666666667
Epoch 10: Validation Accuracy = 0.6283333333333333
Epoch 11: Validation Accuracy = 0.62875
Epoch 12: Validation Accuracy = 0.6258333333333334
Epoch 13: Validation Accuracy = 0.6279166666666667
Epoch 14: Validation Accuracy = 0.62875
Epoch 15: Validation Accuracy = 0.6275
Epoch 16: Validation Accuracy = 0.6354166666666666
Epoch 17: Validation Accuracy = 0.6341666666666667
Epoch 18: Validation Accuracy = 0.6325
Epoch 19: Validation Accuracy = 0.63125
Epoch 20: Validation Accuracy = 0.6283333333333333
Epoch 21: Validation Accuracy = 0.6325
Epoch 22: Validation Accuracy = 0.6341666666666667
Epoch 

LoadError: InterruptException:

In [6]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model = Chain(
    Dense(300, 128, tanh),
    Dense(128, 64, tanh),
    Dense(64, 5),
    softmax
) |> device

# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(NADAM(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
    last_imp+=1
    if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "bestmlp3layersownnestor.bson" model_params
           last_imp=0
    end
    if last_imp>10
        break
    end
end



# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.6216666666666667
Epoch 2: Validation Accuracy = 0.6225
Epoch 3: Validation Accuracy = 0.6166666666666667
Epoch 4: Validation Accuracy = 0.6345833333333334
Epoch 5: Validation Accuracy = 0.60875
Epoch 6: Validation Accuracy = 0.59875
Epoch 7: Validation Accuracy = 0.6325
Epoch 8: Validation Accuracy = 0.62625
Epoch 9: Validation Accuracy = 0.6291666666666667
Epoch 10: Validation Accuracy = 0.6195833333333334
Epoch 11: Validation Accuracy = 0.6325
Epoch 12: Validation Accuracy = 0.6095833333333334
Epoch 13: Validation Accuracy = 0.62625
Epoch 14: Validation Accuracy = 0.6275
Epoch 15: Validation Accuracy = 0.6329166666666667
MLP Accuracy on Test Set: 0.6366666666666667


In [7]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model= Chain(
  Dense(300 => 32, relu),
  Dense(32 => 5),
  softmax) |> device

# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(Adam(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
      last_imp+=1
    if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "simpleownadam.bson" model_params 
         last_imp=0
    end
    if last_imp>10
        break
    end
end




# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.6045833333333334
Epoch 2: Validation Accuracy = 0.6120833333333333
Epoch 3: Validation Accuracy = 0.6266666666666667
Epoch 4: Validation Accuracy = 0.63125
Epoch 5: Validation Accuracy = 0.6325
Epoch 6: Validation Accuracy = 0.6366666666666667
Epoch 7: Validation Accuracy = 0.6145833333333334
Epoch 8: Validation Accuracy = 0.63
Epoch 9: Validation Accuracy = 0.6370833333333333
Epoch 10: Validation Accuracy = 0.63125
Epoch 11: Validation Accuracy = 0.6358333333333334
Epoch 12: Validation Accuracy = 0.63625
Epoch 13: Validation Accuracy = 0.6425
Epoch 14: Validation Accuracy = 0.6425
Epoch 15: Validation Accuracy = 0.6254166666666666
Epoch 16: Validation Accuracy = 0.6433333333333333
Epoch 17: Validation Accuracy = 0.6366666666666667
Epoch 18: Validation Accuracy = 0.6325
Epoch 19: Validation Accuracy = 0.6333333333333333
Epoch 20: Validation Accuracy = 0.6383333333333333
Epoch 21: Validation Accuracy = 0.63875
Epoch 22: Validation Accuracy = 0.62875
Epoc

In [8]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model= Chain(
  Dense(300 => 32, relu),
  Dense(32 => 5),
  softmax) |> device

# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(AdaGrad(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
      last_imp+=1
    if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "simpleadaown.bson" model_params 
        last_imp=0
    end
    if last_imp>10
        break
    end
end




# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.5766666666666667
Epoch 2: Validation Accuracy = 0.5970833333333333
Epoch 3: Validation Accuracy = 0.6058333333333333
Epoch 4: Validation Accuracy = 0.61
Epoch 5: Validation Accuracy = 0.60875
Epoch 6: Validation Accuracy = 0.6125
Epoch 7: Validation Accuracy = 0.6154166666666666
Epoch 8: Validation Accuracy = 0.6166666666666667
Epoch 9: Validation Accuracy = 0.6154166666666666
Epoch 10: Validation Accuracy = 0.61375
Epoch 11: Validation Accuracy = 0.6179166666666667
Epoch 12: Validation Accuracy = 0.6158333333333333
Epoch 13: Validation Accuracy = 0.6183333333333333
Epoch 14: Validation Accuracy = 0.6175
Epoch 15: Validation Accuracy = 0.62
Epoch 16: Validation Accuracy = 0.6191666666666666
Epoch 17: Validation Accuracy = 0.6216666666666667
Epoch 18: Validation Accuracy = 0.6216666666666667
Epoch 19: Validation Accuracy = 0.6208333333333333
Epoch 20: Validation Accuracy = 0.6216666666666667
Epoch 21: Validation Accuracy = 0.6225
Epoch 22: Validation Acc

In [9]:
using Flux
using Flux: onehot, onecold, onehotbatch, crossentropy, throttle, params
using Zygote
using BSON, JLD2, Statistics
using CUDA
using Random
using Base.Iterators: repeated, partition
import StatsBase.sample, StatsBase.Weights

# Check if GPU is available and set the device accordingly
use_cuda = true
if use_cuda && CUDA.functional()
    device = gpu
    @info "Training on GPU"
else
    device = cpu
    @info "Training on CPU"
end

# Transpose and move X_train to the correct device
X_train1 = transpose(X_train) |> device
batch_size = 32
X_valid1 = transpose(X_valid) |> device
X_test1 = transpose(X_test) |> device

# Convert y_train to onehot encoding and move to the correct device
y_train1 = Flux.onehotbatch(y_train, 1:5) |> device
y_valid1 = Flux.onehotbatch(y_valid, 1:5) |> device
y_test1 = Flux.onehotbatch(y_test, 1:5) |> device

# Create DataLoader and move to the correct device
loader = Flux.DataLoader((X_train1, y_train1), batchsize=batch_size, shuffle=true)

# Define the MLP model and move to the correct device
model= Chain(
  Dense(300 => 32, relu),
  Dense(32 => 5),
  softmax) |> device


# Define the loss function
loss(model, X, y) = Flux.crossentropy(model(X), y)

# Setup the optimizer
opt = Flux.setup(NADAM(0.004), model)

# Function to calculate accuracy
accuracy(X, y) = mean(onecold(model(X)) .== onecold(y))

# Training with validation
best_val_accuracy = 0.0
best_model_params = nothing
last_imp=0
for epoch in 1:491
    Flux.train!(loss, model, loader, opt)
    val_accuracy = accuracy(X_valid1, y_valid1)
    println("Epoch $epoch: Validation Accuracy = $val_accuracy")
      last_imp+=1
    if val_accuracy > best_val_accuracy
        best_val_accuracy = val_accuracy
        model_params = cpu(Flux.state(model))
        BSON.@save "simplesownnestor.bson" model_params 
        last_imp=0
    end
    if last_imp>10
        break
    end
end




# Evaluate on the test set
test_accuracy = accuracy(X_test1, y_test1)
println("MLP Accuracy on Test Set: ", test_accuracy)


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mTraining on GPU


Epoch 1: Validation Accuracy = 0.6233333333333333
Epoch 2: Validation Accuracy = 0.6204166666666666
Epoch 3: Validation Accuracy = 0.6258333333333334
Epoch 4: Validation Accuracy = 0.6195833333333334
Epoch 5: Validation Accuracy = 0.6375
Epoch 6: Validation Accuracy = 0.6258333333333334
Epoch 7: Validation Accuracy = 0.6416666666666667
Epoch 8: Validation Accuracy = 0.6383333333333333
Epoch 9: Validation Accuracy = 0.6258333333333334
Epoch 10: Validation Accuracy = 0.6283333333333333
Epoch 11: Validation Accuracy = 0.6275
Epoch 12: Validation Accuracy = 0.6379166666666667
Epoch 13: Validation Accuracy = 0.635
Epoch 14: Validation Accuracy = 0.6291666666666667
Epoch 15: Validation Accuracy = 0.6304166666666666
Epoch 16: Validation Accuracy = 0.6325
Epoch 17: Validation Accuracy = 0.6370833333333333
Epoch 18: Validation Accuracy = 0.63375
MLP Accuracy on Test Set: 0.6433333333333333
