In [None]:

# Q12. Implement Linear Regression problem. For example, based on a dataset comprising of existing set of prices and area/size of the houses, predict the estimated price of a given house.

#Required imports
import pandas as pd
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt

# Reading csv file to dataframe
df = pd.read_csv('houseprices.csv')
df.head() #----------o/p

# Scatter plot for the dataset
%matplotlib inline
plt.xlabel('area')
plt.ylabel('price')
plt.scatter(df.area,df.price,color='red',marker='+') #----------o/p

# Preparing data for training

x_df = df.drop('price',axis='columns')
x_df.head() #----------o/p

price = df.price
price #----------o/p

# Applying Linear Regression

# Create linear regression object
reg = linear_model.LinearRegression()
reg.fit(x_df,price) #----------o/p

m = reg.coef_
c = reg.intercept_
print('Coefficient, m = ', m)
print('Intercept, c = ', c) #----------o/p


# Predictions

ans1 = reg.predict([[3300]])
print('(1) Price of a house with area = 3300 sqr ft: ', ans1) #----------o/p

y = m*3300 + c
print('y = m*x + c =', y) #----------o/p

#Here, we can see that `y = ans1 = 626751.51777971`
# Another prediction

ans2 = reg.predict([[6000]])
print('(2) Price of a house with area = 6000 sqr ft: ', ans2) #----------o/p

y = m*6000 + c
print('y = m*x + c =', y) #----------o/p


### Visualising Best Fit Line

x = np.linspace(2000,5500)
y = m*x+c
plt.plot(x, y, '-r', label='Best Fit Line')
plt.legend(loc='upper left')
plt.title('Graph of Best fit line')
plt.xlabel('area')
plt.ylabel('price')
plt.scatter(df.area,df.price,color='red',marker='+')
plt.grid()
plt.show() #----------o/p

#========================================================================

# Q13. Based on multiple features/variables perform Linear Regression. For example, based on a number of additional features like number of bedrooms, servant room, number of balconies, number of houses of years a house has been built – predict the price of a house.

#Required imports
import pandas as pd
import numpy as np
from sklearn import linear_model #----------o/p

# Reading csv file to dataframe
df = pd.read_csv('houseprices2.csv')
df #----------o/p

### Data Preprocessing: Fill NA values with median value of a column

df.bedrooms.median() #----------o/p

df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())
df #----------o/p

### Applying Linear Regression

reg = linear_model.LinearRegression()
reg.fit(df.drop('price',axis='columns'), df.price) #----------o/p

m1, m2, m3 = reg.coef_
c = reg.intercept_
print('Coefficients, \
\n\tm1 = {}, \
\n\tm2 = {}, \
\n\tm3 = {}'.format(m1, m2, m3))
print('Intercept, c = ', c) #----------o/p

### Predictions

ans1 = reg.predict([[3000, 3, 40]])
print('(1) Price of home with 3000 sqr ft area, 3 bedrooms, 40 year old: ', ans1) #----------o/p

 y1 = m1*3000 + m2*3 + m3*40 + c
print('\ty1 = m1*x1 + m2*x2 + m3*x3 + c =\n\t', y1) #----------o/p

reg.predict([[2500, 5, 10]])
print('(2) Price of home with 2500 sqr ft area, 5 bedrooms, 10 year old: ', ans2) #----------o/p

 y1 = m1*2500 + m2*5 + m3*10 + c
print('\ty1 = m1*x1 + m2*x2 + m3*x3 + c =\n\t', y1) #----------o/p

#=================================================================================

# Q14. Implement a classification/ logistic regression problem. For example based on different features of students data, classify, whether a student is suitable for a particular activity. Based on the available dataset, a student can also implement another classification problem like checking whether an email is spam or not.

# Import and load digits dataset
from sklearn.datasets import load_digits
digits = load_digits()

# Import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt #----------o/p

# Plot 2D matrix data of digits
plt.gray()
for i in range(5):
    plt.matshow(digits.images[i]) #----------o/p

# Get the attributes/columns of digits dataset
dir(digits) #----------o/p

### Creating and training the logistic regression model

# Import the model
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=3000) #----------o/p

# Import train_test_split
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(digits.data,digits.target, test_size=0.2)
#----------o/p

print(len(X_train), len(X_test), len(y_train), len(y_test)) #----------o/p

# Training the model
model.fit(X_train, y_train) #----------o/p

### Measuring accuracy of our model

model.score(X_test, y_test) #----------o/p

### Predictions

model.predict(digits.data[0:5]) #----------o/p

y_predicted = model.predict(X_test)
y_predicted #----------o/p

### Confusion matrix

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_predicted)
cm #----------o/p

import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth') #----------o/p

#15. Use some function for regularization of dataset based on problem 14.

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn import metrics

data = pd.read_csv("dataset.csv")
print(data.shape)
x = data.drop('User ID',axis = 1)
x = data.drop('Gender',axis = 1)
y = data['User ID']
x_train, x_test, y_train, y_test = train_test_split(x, y,test_size = 0.3, random_state=1)
linreg = LinearRegression()
linreg.fit(x_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None)
print ("iNTERCEPT : ",linreg.intercept_)
print ("CO-EFFICIENT : ",linreg.coef_)
y_pred = linreg.predict(x_test)
print("R-Square Value",r2_score(y_test,y_pred))
print("\n")
print ("mean_absolute_error :",metrics.mean_absolute_error(y_test, y_pred))
print("\n")
print ("mean_squared_error : ",metrics.mean_squared_error(y_test, y_pred))
print("\n")
print ("root_mean_squared_error : ",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

#16. Use some function for neural networks, like Stochastic Gradient Descent or backpropagation 
#- algorithm to predict the value of a variable based on the dataset of problem 14

import pandas as pd
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import confusion_matrix, accuracy_score

data = pd.read_csv('dataset.csv')
x = pd.DataFrame(data.iloc[:,1:4].values)
y = data.iloc[:,4].values
lblen = LabelEncoder()
x.loc[:,0] = lblen.fit_transform(x.loc[:,0])
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=0)
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

classi = Sequential()
classi.add(Dense(6, activation = 'relu',input_dim = 3))
classi.add(Dense(6, activation = 'relu'))
classi.add(Dense(1, activation = 'sigmoid'))
classi.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classi.fit(xtrain, ytrain, batch_size = 2, epochs = 5)
ypred = classi.predict(xtest)
ypred = (ypred > 0.5)
cmatrix = confusion_matrix(ytest, ypred)
print(cmatrix)
accuracy_score(ytest,ypred)

