### Part 1: Equation fitting

In [1]:
# Import Libraries
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
from decimal import Decimal

In [2]:
# ดึงข้อมูลจากไฟล์ csv
df = pd.read_csv("Input.csv")
df.head()

Unnamed: 0,Y,X1,X2,X3
0,222000.0,1,6.3,2016
1,177000.0,1,5.5,2016
2,177777.0,1,5.5,2016
3,199999.0,1,5.5,2016
4,199999.0,1,5.5,2016


In [3]:
# สร้าง function สำหรับ print สมการ (อ่านข้ามส่วนนี้ไปได้)
# Function: Generate polynomial equation
def Poly_equation(feature, coef, intercept, power):
    '''
    Return fitted polynomial equation as a string
    
    feature: list of feature in dataset
    power: degree of each feature in each term (can get from poly.powers_)
    '''
    poly_string = ""
    
    for i in range(len(coef)): # create polynomial term
        
        #Coefficients
        if i == 0:
            term_string = "y = %.2E" % Decimal(coef[i])
        elif coef[i] >= 0: # add + sign in front of coef
            term_string = "+%.2E" % Decimal(coef[i])
        else:
            term_string = "%.2E" % Decimal(coef[i])
        
        #Powers
        feature_order = 0
        for power_iter in power[i]: # power for each feature
            if power_iter == 1 : #degree of that feature = 1
                term_string += '*' + str(feature[feature_order])
            elif power_iter > 1 : #degree of that feature > 1
                term_string += '*' + str(feature[feature_order]) + '^' + str(power_iter)
            feature_order += 1
        poly_string += term_string
    
    #Add intercept
    if intercept >= 0:
        poly_string += "+"
    poly_string += "%.2E" % Decimal(intercept)
    
    return poly_string

# Function: Generate log equation
def Log_equation(feature,a,b):
    # Desired equation: y = bln(x)+a
    log_string = "y = %.2E*ln(%s)" % (Decimal(b),feature)
    if a >= 0:
        log_string += "+"
    log_string += "%.2E" % Decimal(a)
    return log_string

# Function: Generate exponential equation
def Exp_equation(feature,d,e):
    # Desired equation: y = exp(e)*exp(d*x)
    exp_string = "y = %.2E*exp(%.2E*%s)" % (Decimal(np.exp(e)),Decimal(d),feature)
    return exp_string

# Function: Generate power equation
def Power_equation(feature,f,g):
    # Desired equation: y = exp(g)*x^f
    pow_string = "y = %.2E*%s^(%.2E)" % (Decimal(np.exp(g)),feature,Decimal(f))
    return pow_string

In [4]:
# แบ่ง dataframe เป็นตัวแปร X และ y
# ในตัวอย่างนี้จะใช้แค่ X1 ในการสร้างสมการ
X = df.iloc[:,1:2] #X input
y = df[df.columns[0]]


<b> Linear fitting (1 feature) </b> <br>
- create polynomial feature

In [5]:
nPoly = 3 #กำหนดให้เป็นสมการดีกรี 3
poly = PolynomialFeatures(degree=nPoly, include_bias=None)
# transform features to polynomial terms
X_ = poly.fit_transform(X) 
print("Polynomial degree: "+str(nPoly))
print("Degree for each feature:\n" + str(poly.powers_))

Polynomial degree: 3
Degree for each feature:
[[1]
 [2]
 [3]]


นั่นคือจะได้มา 3 พจน์ได้แก่ x, x^2 และ x^3

- linear regression หาค่าคงที่ที่ทำให้สมการฟิตกับข้อมูล

In [6]:
# Linear fitting: y = ax^3 + bx^2 + cx + d
lg = LinearRegression()
lg.fit(X_, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
# print สมการและค่า R-square
print("Equation: " + Poly_equation('X1', lg.coef_, lg.intercept_, poly.powers_))
print("R-Square = "+ str(np.round(lg.score(X_,y),3))+"\n")

Equation: y = -3.85E+02*X+9.05E-01*X^2-5.66E-04*X^3+5.46E+04
R-Square = 0.326



<b> Exponential fitting </b> <br>
สำหรับสมการในรูปแบบ Exponential, Natural log และ Power จะใช้ฟังก์ชัน polyfit ของ numpy ในการหาค่าคงที่ <br>
เพิ่มเติม: ใส่ try and except ให้โค้ดสามารถรันต่อไปได้เมื่อมี error เกิดขึ้นเมื่อไม่สามารถ fit สมการ expo, log หรือ power ได้

In [8]:
#Exponential fitting: y = a*exp(b*X)
try:
    d,e = np.polyfit(X.loc[:,'X1'], np.log(y), 1) # input for polyfit must be 1D numpy array
    print(Exp_equation('X1',d,e))
except:
    print("Contain error in exponential fitting")

y = 2.93E+04*exp(-5.33E-03*X1)


<b> Natural log fitting </b>

In [9]:
# Natural log fitting: y = a+bln(X)
try:
    b,a = np.polyfit(X.loc[:,'X1'], y, 1)
    print(Log_equation('X1',a,b))
except:
    print("Contain error in log fitting")

y = -1.18E+02*ln(X1)+3.90E+04


<b> Power fitting </b>

In [10]:
#Power fitting: y = aX^b
try:
    f,g = np.polyfit(X.loc[:,'X1'],np.log(y),1)
    print(Power_equation('X1',f,g))
except:
    print("Contain error in power fitting")

y = 2.93E+04*X1^(-5.33E-03)


<b> Linear fitting (more than 1 feature) </b>

In [11]:
# ในตัวอย่างนี้จะใช้ X1 และ X2 ในการสร้างสมการ
XX = df.iloc[:,1:3]

In [12]:
nPoly = 3
poly = PolynomialFeatures(degree=nPoly, include_bias=None)
# transform features to polynomial terms
XX_ = poly.fit_transform(XX)
print("Polynomial degree: "+str(nPoly))
print("Degree for each feature(X1,X2):\n" + str(poly.powers_))

Polynomial degree: 3
Degree for each feature(X1,X2):
[[1 0]
 [0 1]
 [2 0]
 [1 1]
 [0 2]
 [3 0]
 [2 1]
 [1 2]
 [0 3]]


นั่นคือจะได้มา 9 พจน์ได้แก่ x1, x2, x1^2, x1\*x2, x2^2, x1^3, x1^2\*x2, x1\*x2^2 และ x2^3

In [13]:
# Linear fitting
lg = LinearRegression()
lg.fit(XX_, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [14]:
# print สมการและค่า R-square
print("Equation: " + Poly_equation(['X1','X2'], lg.coef_, lg.intercept_, poly.powers_))
print("\n R-Square = "+ str(np.round(lg.score(XX_,y),3))+"\n")

Equation: y = 6.78E+01*X1+2.39E+04*X2+1.72E-01*X1^2-1.63E+02*X1*X2+2.06E+03*X2^2-6.79E-04*X1^3+3.06E-01*X1^2*X2-5.72E+00*X1*X2^2-2.14E+02*X2^3-1.61E+04

 R-Square = 0.639

