# About data

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



* dataset is from CIBIL Bureau which consists of customers data
* there are 61 independent feature and 1 dependent feature
* dependent feature consists of 4 classes --> P1, P2, P3, P4
* P1 is given to customer with very good credit score
* P2 is given to customer which is good but not as good as P1 and so on.
* Loan is also called TL (Trade line) in bank language.
* Secured loans are something which is backed by a collateral while unsecured is not backed by anything.
* This is why unsecured loans has more interest rates compared to secured loans

In [112]:
internal_df = pd.read_excel('case_study1.xlsx')
internal_df.head(2)

Unnamed: 0,PROSPECTID,Total_TL,Tot_Closed_TL,Tot_Active_TL,Total_TL_opened_L6M,Tot_TL_closed_L6M,pct_tl_open_L6M,pct_tl_closed_L6M,pct_active_tl,pct_closed_tl,...,CC_TL,Consumer_TL,Gold_TL,Home_TL,PL_TL,Secured_TL,Unsecured_TL,Other_TL,Age_Oldest_TL,Age_Newest_TL
0,1,5,4,1,0,0,0.0,0.0,0.2,0.8,...,0,0,1,0,4,1,4,0,72,18
1,2,1,0,1,0,0,0.0,0.0,1.0,0.0,...,0,1,0,0,0,0,1,0,7,7


* Total_TL = total loans
* L6M = loans in last 6 months
* pct_tl = percentage of loan

In [110]:
external_df = pd.read_excel('case_study2.xlsx')
external_df.head(2)

Unnamed: 0,PROSPECTID,time_since_recent_payment,time_since_first_deliquency,time_since_recent_deliquency,num_times_delinquent,max_delinquency_level,max_recent_level_of_deliq,num_deliq_6mts,num_deliq_12mts,num_deliq_6_12mts,...,pct_CC_enq_L6m_of_L12m,pct_PL_enq_L6m_of_ever,pct_CC_enq_L6m_of_ever,max_unsec_exposure_inPct,HL_Flag,GL_Flag,last_prod_enq2,first_prod_enq2,Credit_Score,Approved_Flag
0,1,549,35,15,11,29,29,0,0,0,...,0.0,0.0,0.0,13.333,1,0,PL,PL,696,P2
1,2,47,-99999,-99999,0,-99999,0,0,0,0,...,0.0,0.0,0.0,0.86,0,0,ConsumerLoan,ConsumerLoan,685,P2


In [4]:
from sklearn.metrics import r2_score
from scipy.stats import chi2_contingency
from statsmodels.stats.outliers_influence import variance_inflation_factor

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support

import os
import warnings
warnings.filterwarnings('ignore')

In [5]:
df1 = internal_df.copy()
df2 = external_df.copy()

In [6]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51336 entries, 0 to 51335
Data columns (total 26 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   PROSPECTID            51336 non-null  int64  
 1   Total_TL              51336 non-null  int64  
 2   Tot_Closed_TL         51336 non-null  int64  
 3   Tot_Active_TL         51336 non-null  int64  
 4   Total_TL_opened_L6M   51336 non-null  int64  
 5   Tot_TL_closed_L6M     51336 non-null  int64  
 6   pct_tl_open_L6M       51336 non-null  float64
 7   pct_tl_closed_L6M     51336 non-null  float64
 8   pct_active_tl         51336 non-null  float64
 9   pct_closed_tl         51336 non-null  float64
 10  Total_TL_opened_L12M  51336 non-null  int64  
 11  Tot_TL_closed_L12M    51336 non-null  int64  
 12  pct_tl_open_L12M      51336 non-null  float64
 13  pct_tl_closed_L12M    51336 non-null  float64
 14  Tot_Missed_Pmnt       51336 non-null  int64  
 15  Auto_TL            

# Dealing with null values
* -99999 means null values

In [7]:
df1[df1.eq(-99999).any(axis = 1)].shape

(40, 26)

In [8]:
df1 = df1[~df1.eq(-99999).any(axis = 1)]

In [9]:
df1.shape

(51296, 26)

In [10]:
columns_to_be_removed = []

for i in range(0, df2.shape[1]):
    if df2[df2.iloc[:, i] == -99999].shape[0] > 10000:
        columns_to_be_removed.append(i)

print(columns_to_be_removed)

[2, 3, 5, 10, 11, 47, 49, 55]


In [11]:
df2 = df2.drop(df2.iloc[:, columns_to_be_removed], axis = 1)

In [12]:
df2.shape

(51336, 54)

In [13]:
for i in df2.columns:
    df2 = df2[~df2.eq(-99999).any(axis = 1)]

df2.shape

(42066, 54)

In [14]:
# df1.isna().sum()

In [15]:
# df2.isna().sum()

In [16]:
df = pd.merge(df1, df2, how = 'inner', left_on = ['PROSPECTID'], right_on = ['PROSPECTID'])

In [17]:
df.shape

(42064, 79)

In [111]:
df.sample()

Unnamed: 0,PROSPECTID,Total_TL,Tot_Closed_TL,Tot_Active_TL,Total_TL_opened_L6M,Tot_TL_closed_L6M,pct_tl_open_L6M,pct_tl_closed_L6M,pct_active_tl,pct_closed_tl,...,pct_PL_enq_L6m_of_L12m,pct_CC_enq_L6m_of_L12m,pct_PL_enq_L6m_of_ever,pct_CC_enq_L6m_of_ever,HL_Flag,GL_Flag,last_prod_enq2,first_prod_enq2,Credit_Score,Approved_Flag
23681,28927,2,0,2,0,0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0,0,ConsumerLoan,others,691,P2


In [19]:
df.isna().sum().sum()

0

# Dealing with categorical columns

In [20]:
for i in df.columns:
    if df[i].dtype == 'object':
        print(i)

MARITALSTATUS
EDUCATION
GENDER
last_prod_enq2
first_prod_enq2
Approved_Flag


In [21]:
for i in df.columns:
    if df[i].dtype == 'object':
        print(df[i].value_counts())
        print()
        print('*'*20)

MARITALSTATUS
Married    30886
Single     11178
Name: count, dtype: int64

********************
EDUCATION
GRADUATE          14140
12TH              11703
SSC                7241
UNDER GRADUATE     4572
OTHERS             2291
POST-GRADUATE      1898
PROFESSIONAL        219
Name: count, dtype: int64

********************
GENDER
M    37345
F     4719
Name: count, dtype: int64

********************
last_prod_enq2
ConsumerLoan    16480
others          13653
PL               7553
CC               2195
AL               1353
HL                830
Name: count, dtype: int64

********************
first_prod_enq2
others          20640
ConsumerLoan    11075
PL               4431
AL               2641
CC               1988
HL               1289
Name: count, dtype: int64

********************
Approved_Flag
P2    25452
P3     6440
P4     5264
P1     4908
Name: count, dtype: int64

********************


* first we will work on Marital Status
* marital status has 2 value_counts --> single and married
* now we cannot encode this to 0 and 1 because these are not ORDINAL values
* ordinal values are --> small, medium, large or low and high

# We will perform Hypothesis testing
Question - Are these two columns associated ?

* `Null Hypothesis (H0)` --> this two are not associated
* `Alternate Hypothesis (H1)` --> this two are associated

* `Alpha --> significance level`, usually it is 5% or 0.05, This threshold tells us, main kitna galat ho sakta hoon.

* `Confidence level` --> point estimate and margin of error (alpha), 1 - alpha

* We need to find evidence to support the alternate hypothesis which we do using p-value.
* If we don't have enough evidence then we can say that we don't have enough evidence to support alternate hypothesis, and if we have enough evidence then we reject the null hypothesis.

* `p-value` is calculated using tests, tests like t-test, chi-square test, Anova.

* `if p-value <= alpha --> Reject H0 and if p-value >= alpha --> fail to reject the H0`

# TESTS
* <b>Chisquare</b> - categorical v categorical
  
  `Ex : marital status vs loan approved or not`
  
* <b>t-test</b> - categorical v numerical (2 categorical columns) --> only 2 value_counts

  `Ex : Age vs loan approved or not`
  
* <b>ANOVA</b> - categorical v numerical (>=3 categorical columns) --> more than or equal to 3 value_counts
  
  `Ex : Age vs p1, p2, p3, p4`

# Feature selection 
#### Categorical columns

In [22]:
# Chisquare test

for i in ['MARITALSTATUS', 'EDUCATION', 'GENDER', 'last_prod_enq2', 'first_prod_enq2']:
    chi2, pval, _, _ = chi2_contingency(pd.crosstab(df[i], df['Approved_Flag']))
    print(i, '---', pval)

MARITALSTATUS --- 3.578180861038862e-233
EDUCATION --- 2.6942265249737532e-30
GENDER --- 1.907936100186563e-05
last_prod_enq2 --- 0.0
first_prod_enq2 --- 7.84997610555419e-287


* Since all the categorical columns have p-value less than 0.05, we will select all of them

#### Numerical columns

In [23]:
numeric_col = []

for i in df.columns:
    if df[i].dtype != 'object' and i not in ['PROSPECTID', 'Approved_Flag']:
        numeric_col.append(i)

len(numeric_col)

72

* Before checking columns with output col, first we need to check multi-collinearity or VIF
* `Multi-collinearity tells us predictibility of each features by other features.`
* `Correlation tells us the intensity of the relationship also negative or positive, specific to linear relationships between columns.`
* In convex functions, correlation gives misleading values.

# VIF
* Variance inflation factor
* used to identify multicollinearity
* takes r-squared value and eliminate if crosses the threshold
* `1/ 1 - r-squared`
* `VIF = 1 --> no multicollinearity`
* `VIF = 1 - 5 --> low multicollinearity`
* `VIF = 5 - 10 --> moderate multicollinearity`
* `VIF > 10 --> High multicollinearity`

In [24]:
# VIF - sequential method

vif_data = df[numeric_col]
total_columns = vif_data.shape[1]
columns_to_be_kept = []
column_index = 0

for i in range (0,total_columns):
    
    vif_value = variance_inflation_factor(vif_data, column_index)
    print (column_index,'---',vif_value)
    
    
    if vif_value <= 6:
        columns_to_be_kept.append( numeric_col[i] )
        column_index = column_index+1
    
    else:
        vif_data = vif_data.drop([ numeric_col[i] ] , axis=1)


0 --- inf
0 --- inf
0 --- 11.320180023967996
0 --- 8.363698035000336
0 --- 6.520647877790928
0 --- 5.149501618212625
1 --- 2.6111110405797344
2 --- inf
2 --- 1788.7926256209232
2 --- 8.601028256477228
2 --- 3.832800792153077
3 --- 6.099653381646731
3 --- 5.581352009642731
4 --- 1.9855843530987776
5 --- inf
5 --- 4.80953830281934
6 --- 23.270628983464636
6 --- 30.595522588100053
6 --- 4.384346405965583
7 --- 3.064658415523423
8 --- 2.898639771299251
9 --- 4.377876915347319
10 --- 2.207853583695841
11 --- 4.916914200506864
12 --- 5.214702030064725
13 --- 3.386162502423145
14 --- 7.840583309478997
14 --- 5.255034641721434
15 --- inf
15 --- 7.380634506427238
15 --- 1.4210050015175735
16 --- 8.083255010190316
16 --- 1.6241227524040114
17 --- 7.257811920140003
17 --- 15.59624383268298
17 --- 1.8258570471324307
18 --- 1.5080839450032664
19 --- 2.1720888348245757
20 --- 2.623397553527229
21 --- 2.295997081210618
22 --- 7.360578319196433
22 --- 2.160238777310255
23 --- 2.8686288267891475
24 ---

In [25]:
len(columns_to_be_kept)

39

# ANOVA test

In [26]:
from scipy.stats import f_oneway

columns_to_be_kept_numerical = []

for i in columns_to_be_kept:
    a = list(df[i])  
    b = list(df['Approved_Flag'])  
    
    group_P1 = [value for value, group in zip(a, b) if group == 'P1']
    group_P2 = [value for value, group in zip(a, b) if group == 'P2']
    group_P3 = [value for value, group in zip(a, b) if group == 'P3']
    group_P4 = [value for value, group in zip(a, b) if group == 'P4']


    f_statistic, p_value = f_oneway(group_P1, group_P2, group_P3, group_P4)

    if p_value <= 0.05:
        columns_to_be_kept_numerical.append(i)

In [27]:
len(columns_to_be_kept_numerical)

37

# Example for Chi2 and ANOVA

In [28]:
a = [1, 2, 3]
b = [4, 5, 6]

print(list(zip(a, b)))

[(1, 4), (2, 5), (3, 6)]


In [29]:
# Example of chi2

a = ['cat', 'cat', 'cat', 'dog', 'dog', 'dog', 'mouse', 'mouse', 'mouse']
b = ['p1', 'p1', 'p1', 'p2', 'p2', 'p2', 'p3', 'p3', 'p3']

In [30]:
df_test = pd.DataFrame({'a' : a, 'b' : b})

In [31]:
df_test

Unnamed: 0,a,b
0,cat,p1
1,cat,p1
2,cat,p1
3,dog,p2
4,dog,p2
5,dog,p2
6,mouse,p3
7,mouse,p3
8,mouse,p3


In [32]:
chi2, pval, _, _ = chi2_contingency(pd.crosstab(df_test['a'], df_test['b']))
print(pval)

0.0012340980408667957


In [33]:
if pval < 0.05:
    print('We will reject H0, there is association')
else:
    print('We dont have enough evidence to support H1, there is not any association')

We will reject H0, there is association


In [34]:
# Example of chi2

a1 = ['cat', 'cat', 'cat', 'dog', 'dog', 'dog', 'mouse', 'mouse', 'mouse']
b1 = ['p1', 'p1', 'p2', 'p3', 'p2', 'p3', 'p3', 'p2', 'p1']

In [35]:
df_test_1 = pd.DataFrame({'a1' : a1, 'b1' : b1})
df_test_1

Unnamed: 0,a1,b1
0,cat,p1
1,cat,p1
2,cat,p2
3,dog,p3
4,dog,p2
5,dog,p3
6,mouse,p3
7,mouse,p2
8,mouse,p1


In [36]:
chi2_1, pval_1, _, _ = chi2_contingency(pd.crosstab(df_test_1['a1'], df_test_1['b1']))
print(pval_1)

0.40600584970983794


In [37]:
if pval_1 < 0.05:
    print('We will reject H0, there is association')
else:
    print('We dont have enough evidence to support H1, there is not any association')

We dont have enough evidence to support H1, there is not any association


In [38]:
# Example of ANOVA Test

a = [10, 10, 11, 12, 12, 12, 15, 15, 15]
b = ['p1', 'p1', 'p2', 'p2', 'p2', 'p2', 'p3', 'p3', 'p3']

In [39]:
grp1 = [val for val, grp in zip(a, b) if grp == 'p1']
grp2 = [val for val, grp in zip(a, b) if grp == 'p2']
grp3 = [val for val, grp in zip(a, b) if grp == 'p3']

In [40]:
f_statistic, p_value = f_oneway(grp1, grp2, grp3)

In [41]:
print(p_value)

1.0525898285282773e-05


In [42]:
if p_value < 0.05:
    print('We will reject H0, there is association')
else:
    print('We dont have enough evidence to support H1, there is not any association')

We will reject H0, there is association


In [43]:
df.shape

(42064, 79)

In [44]:
print(df['MARITALSTATUS'].unique())  
print(df['EDUCATION'].unique())
print(df['GENDER'].unique())
print(df['last_prod_enq2'].unique())
print(df['first_prod_enq2'].unique())

['Married' 'Single']
['12TH' 'GRADUATE' 'SSC' 'POST-GRADUATE' 'UNDER GRADUATE' 'OTHERS'
 'PROFESSIONAL']
['M' 'F']
['PL' 'ConsumerLoan' 'AL' 'CC' 'others' 'HL']
['PL' 'ConsumerLoan' 'others' 'AL' 'HL' 'CC']


#### Ordinal feature -- EDUCATION
* SSC            : 1
* 12TH           : 2
* GRADUATE       : 3
* UNDER GRADUATE : 3
* POST-GRADUATE  : 4
* OTHERS         : 1
* PROFESSIONAL   : 3

In [45]:
df['EDUCATION'] = df['EDUCATION'].map({'SSC' : 1, '12TH' : 2, 'GRADUATE' : 3, 'UNDER GRADUATE' : 3,
                                       'POST-GRADUATE' : 4, 'OTHERS' : 1, 'PROFESSIONAL' : 3}) 

In [46]:
df['EDUCATION'].value_counts()

EDUCATION
3    18931
2    11703
1     9532
4     1898
Name: count, dtype: int64

In [47]:
df_encoded = pd.get_dummies(df, columns=['MARITALSTATUS','GENDER', 'last_prod_enq2' ,'first_prod_enq2'])

In [48]:
df_encoded.info()
k = df_encoded.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42064 entries, 0 to 42063
Data columns (total 91 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   PROSPECTID                    42064 non-null  int64  
 1   Total_TL                      42064 non-null  int64  
 2   Tot_Closed_TL                 42064 non-null  int64  
 3   Tot_Active_TL                 42064 non-null  int64  
 4   Total_TL_opened_L6M           42064 non-null  int64  
 5   Tot_TL_closed_L6M             42064 non-null  int64  
 6   pct_tl_open_L6M               42064 non-null  float64
 7   pct_tl_closed_L6M             42064 non-null  float64
 8   pct_active_tl                 42064 non-null  float64
 9   pct_closed_tl                 42064 non-null  float64
 10  Total_TL_opened_L12M          42064 non-null  int64  
 11  Tot_TL_closed_L12M            42064 non-null  int64  
 12  pct_tl_open_L12M              42064 non-null  float64
 13  p

# Model fitting 

# Random Forest

In [49]:
x = df_encoded. drop ( ['Approved_Flag'], axis = 1 )
y = df_encoded['Approved_Flag']

In [50]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [51]:
rf_classifier = RandomForestClassifier(n_estimators = 200, random_state=42)
rf_classifier.fit(x_train, y_train)

In [52]:
y_pred = rf_classifier.predict(x_test)

In [53]:
accuracy = accuracy_score(y_test, y_pred)
print ()
print(f'Accuracy: {accuracy}')
print ()
precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)


for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 0.9900154522762391

Class p1:
Precision: 0.9465290806754222
Recall: 0.995069033530572
F1 Score: 0.9701923076923077

Class p2:
Precision: 0.9954617205998422
Recall: 1.0
F1 Score: 0.9977256995945812

Class p3:
Precision: 0.9968102073365231
Recall: 0.9433962264150944
F1 Score: 0.9693679720822024

Class p4:
Precision: 1.0
Recall: 0.9961127308066083
F1 Score: 0.9980525803310614



# xgboost

In [77]:
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

In [78]:
xgb_classifier = xgb.XGBClassifier(objective='multi:softmax',  num_class=4)

In [79]:
x = df_encoded.drop ( ['Approved_Flag'], axis = 1 )
y = df_encoded['Approved_Flag']

In [80]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [81]:
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

In [82]:
xgb_classifier.fit(x_train, y_train)
y_pred = xgb_classifier.predict(x_test)

In [83]:
accuracy = accuracy_score(y_test, y_pred)
print ()
print(f'Accuracy: {accuracy:.2f}')
print ()

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 1.00

Class p1:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p2:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p3:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p4:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0



# Decision Tree

In [61]:
from sklearn.tree import DecisionTreeClassifier

x = df_encoded.drop ( ['Approved_Flag'], axis = 1 )
y = df_encoded['Approved_Flag']

In [62]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [63]:
dt_model = DecisionTreeClassifier(max_depth=20, min_samples_split=10)
dt_model.fit(x_train, y_train)

In [64]:
y_pred = dt_model.predict(x_test)

In [65]:
accuracy = accuracy_score(y_test, y_pred)
print ()
print(f"Accuracy: {accuracy:.2f}")
print ()

precision, recall, f1_score, _ = precision_recall_fscore_support(y_test, y_pred)

for i, v in enumerate(['p1', 'p2', 'p3', 'p4']):
    print(f"Class {v}:")
    print(f"Precision: {precision[i]}")
    print(f"Recall: {recall[i]}")
    print(f"F1 Score: {f1_score[i]}")
    print()


Accuracy: 1.00

Class p1:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p2:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p3:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Class p4:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0



* Try 2-3 models and record the accuracy and other metrics
* Once the metrics are observed, see which one is better
* Then do hyper-parameter on that particular model

#### Recall 
* There are 2 classes red and blue
* Recall for red class will be out of total points of that class how many were predicted as red class

#### Precision
* There are 2 classes red and blue
* Out of all predicted as red class, how many are red class

In [88]:
from sklearn.model_selection import GridSearchCV
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.2, random_state=42)

# Define the XGBClassifier with the initial set of hyperparameters
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=4)

# Define the parameter grid for hyperparameter tuning

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
}

grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(x_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)

# Evaluate the model with the best hyperparameters on the test set
best_model = grid_search.best_estimator_
accuracy = best_model.score(x_test, y_test)
print("Test Accuracy:", accuracy)

Best Hyperparameters: {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 50}
Test Accuracy: 1.0


In [84]:
import pickle
filename = 'credit_risk.sav'
pickle.dump(xgb_classifier, open(filename, 'wb'))

In [106]:
load_model = pickle.load(open(filename, 'rb'))

# Explaining to Business --> Interpretation of the outcomes

* It depends on Risk appetite
* If Risk appetite is low --> they are not willing to take much risk --> we can say target only P1 customers
* If Risk appetite is high --> they are willing to take risk --> we can say target P1, P2 and maybe P3

# Feedback loop

* If we receive feedback as 1 - P3 customer is not actually P3, he/she can be labelled as P2 or P1
* Based on this we will change the label in our dataset 

# Correlation v Causation
* It is not necessary that if two variable are correlated, then one variable is caused by other.
* Ice cream sales increase v shark attacks increase --> this is correlated but this caused because of some other factor (warm temp)