In [252]:
import pandas as pd
import boto3
import sagemaker
from sklearn.model_selection import train_test_split
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.inputs import TrainingInput


In [253]:
bucket='cosyneraw'
data_key = 'cell_metrics.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

df= pd.read_csv(data_location)
df

Unnamed: 0.1,Unnamed: 0,cell_id,Cre,area,expt_id,signal_correlation_dg,signal_correlation_nm3,number_cells,imaging_depth,age,sex,numbercells
0,0,598998745,Sst,VISl,597028936,0.351210,0.074539,,375.0,105.0,female,12.0
1,1,662056046,Sst,VISp,612044633,0.383254,0.139712,,265.0,98.0,male,8.0
2,2,662098491,Sst,VISpm,639117194,0.115936,0.115184,,375.0,98.0,male,7.0
3,3,606133820,Sst,VISpm,601273919,0.238630,0.082120,,275.0,117.0,female,12.0
4,4,662099083,Sst,VISl,593337825,0.327079,0.224116,,275.0,138.0,female,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...
111,111,585736587,Emx1,VISl,583136565,0.134566,0.032729,,175.0,104.0,male,49.0
112,112,578387999,Emx1,VISl,573261513,0.137884,0.045017,,275.0,87.0,male,327.0
113,113,570927169,Emx1,VISal,562536151,0.096853,0.069860,,375.0,111.0,female,123.0
114,114,605627126,Emx1,VISp,595263152,0.159767,0.060545,,375.0,122.0,male,221.0


In [254]:
del df['number_cells']
del df['Unnamed: 0']
del df['cell_id']
del df['expt_id']
del df['Cre']
del df['area']

df['sex'] = df['sex'].map({'male': 1, 'female': 0})

In [255]:
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

In [256]:
df['numbercells'].value_counts()

numbercells
52.0     4
21.0     3
82.0     3
105.0    3
12.0     2
        ..
8.0      1
115.0    1
232.0    1
286.0    1
221.0    1
Name: count, Length: 83, dtype: int64

In [257]:
from sklearn.model_selection import train_test_split
train, test_and_validate = train_test_split(df, test_size=0.2, random_state=42)

In [258]:
test, validate = train_test_split(test_and_validate, test_size=0.5, random_state=42)

In [259]:
print(train.shape)
print(test.shape)
print(validate.shape)

(92, 6)
(12, 6)
(12, 6)


In [260]:
print(train['numbercells'].value_counts())
print(test['numbercells'].value_counts())
print(validate['numbercells'].value_counts())

numbercells
82.0     3
51.0     2
67.0     2
10.0     2
128.0    2
        ..
87.0     1
12.0     1
58.0     1
15.0     1
80.0     1
Name: count, Length: 70, dtype: int64
numbercells
12.0     1
95.0     1
237.0    1
41.0     1
21.0     1
52.0     1
284.0    1
15.0     1
140.0    1
65.0     1
103.0    1
105.0    1
Name: count, dtype: int64
numbercells
97.0     1
286.0    1
91.0     1
102.0    1
52.0     1
171.0    1
115.0    1
17.0     1
103.0    1
226.0    1
90.0     1
275.0    1
Name: count, dtype: int64


In [261]:
train.to_csv('train.csv', header=False, index=False)
validate.to_csv('validate.csv', header=False, index=False)
test.to_csv('test.csv', header=False, index=False)

In [262]:
bucket = 'cosyneraw'
prefix = 'ttv'

In [263]:
sess = sagemaker.Session()

# Upload the files to S3
s3_train = sess.upload_data('train.csv', bucket=bucket, key_prefix=f'{prefix}/train')
s3_validation = sess.upload_data('validate.csv', bucket=bucket, key_prefix=f'{prefix}/validate')
s3_test = sess.upload_data('test.csv', bucket=bucket, key_prefix=f'{prefix}/test')


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [264]:
role = "arn:aws:iam::153395555780:role/LabRole"
container = get_image_uri(boto3.Session().region_name, 'linear-learner')


See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [268]:
linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       instance_count=1, 
                                       instance_type='ml.m4.xlarge',
                                       output_path=f's3://{bucket}/{prefix}/output',
                                       sagemaker_session=sess)
linear.set_hyperparameters(feature_dim=5,
                           predictor_type='regressor',
                           mini_batch_size=20)

In [269]:
data_channels = {
    'train': TrainingInput(s3_train, content_type='text/csv'),
    'validation': TrainingInput(s3_validation, content_type='text/csv')
    # 'test' channel is optional and is not typically used during training
}

# Fit the model
linear.fit(inputs=data_channels)

INFO:sagemaker:Creating training-job with name: linear-learner-2023-12-03-23-52-51-070


2023-12-03 23:52:51 Starting - Starting the training job...
2023-12-03 23:53:06 Starting - Preparing the instances for training.........
2023-12-03 23:54:24 Downloading - Downloading input data...
2023-12-03 23:55:09 Training - Downloading the training image.........
2023-12-03 23:56:50 Uploading - Uploading generated training model[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[12/03/2023 23:56:38 INFO 140583242454848] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'l

In [288]:
linear_predictor = linear.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")
from sagemaker.serializers import CSVSerializer
linear_predictor.serializer = CSVSerializer()
num_features = 5  
sample_data = np.random.rand(num_features)
result = linear_predictor.predict(sample_data, initial_args={"ContentType": "text/csv"})
print(result)

INFO:sagemaker:Creating model with name: linear-learner-2023-12-04-00-44-44-984
INFO:sagemaker:Creating endpoint-config with name linear-learner-2023-12-04-00-44-44-984
INFO:sagemaker:Creating endpoint with name linear-learner-2023-12-04-00-44-44-984


--------!b'{"predictions": [{"score": 47.478424072265625}]}'


In [1]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Assuming y_true are the true labels and y_pred are model predictions
y_true = [0.35,0.074,375,105,0]  # Replace with your actual data
y_pred = [0.16,0.038,275,113,1]  # Replace with your actual data

mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print("MSE:", mse)
print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2):", r2)

MSE: 2013.0074792
Mean Absolute Error (MAE): 21.8452
R-squared (R2): 0.9045865678444202


In [2]:
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.estimator import Estimator

# Get the Linear Learner image URI
linear_learner_image = get_image_uri(sagemaker.Session().boto_region_name, 'linear-learner')

# Define the estimator
estimator = Estimator(
    image_uri=linear_learner_image,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m4.xlarge',
    output_path='s3://cosyneraw/ttv/output'
)


  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
from sagemaker.tuner import IntegerParameter

hyperparameter_ranges = {
    'mini_batch_size': IntegerParameter(32, 512)
}


In [4]:
objective_metric_name = 'test:mse'


In [5]:
from sagemaker.tuner import HyperparameterTuner

# Create a HyperparameterTuner object
tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=1,  # Total number of training jobs
    max_parallel_jobs=1,  # Number of jobs to run in parallel
    objective_type='Minimize'  # Minimize MSE
)

# Start the tuning job
tuner.fit({'train': 's3://cosyneraw/ttv/train', 'validation': 's3://cosyneraw/ttv/validate'})


No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


ClientError: An error occurred (ValidationException) when calling the CreateHyperParameterTuningJob operation: No S3 objects found under S3 URL "s3://cosyneraw//ttv/train" given in input data source. Please ensure that the bucket exists in the selected region (us-east-1), that objects exist under that S3 prefix, and that the role "arn:aws:iam::153395555780:role/LabRole" has "s3:ListBucket" permissions on bucket "cosyneraw".