# Import Libraries

In [None]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Importing the Solubility Dataset

In [None]:
url='https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv'
df = pd.read_csv(url)
df

In [None]:
df.describe()

# Dividing the Data into Dependent and Independent Features

In [None]:
X= df.iloc[:,:-1].values
y= df.iloc[:,-1].values

In [None]:
print(X)

In [None]:
print(y)

# Data Preprocessing

# Checking for missing Data

In [None]:
df.isnull().sum()

# Exploratory Data Analysis

# Feature Scaling

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y.reshape(len(y),1))

# Splitting Data into Training and Test Dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Training Data using SVR

In [None]:
model = SVR(kernel = 'rbf')
model.fit(X_train,y_train)

# Predictions on Test Dataset

In [None]:
y_pred=model.predict(X_test)
y_pred

# Model Performance

In [None]:
print('Mean squared error (MSE): '+ str(mean_squared_error(y_test, y_pred)))
print('R Squared: ' + str(r2_score(y_test, y_pred)))

# Data Visualization

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(x=y_test,y=y_pred,c="#7CAE00",alpha=0.3)
plt.ylabel('Predicted LogS')
plt.xlabel('Experimental LogS')
plt.show()

# Save the Model

In [None]:
pickle.dump(model,open('solubility_model.pkl','wb'))