## Sorting and tagging customers

## Sorting

In [35]:
def my_sort(xs,f=lambda x:x):
    n = len(xs)
    for i in range(n - 1):
        for j in range(0, n-i-1):
            if f(xs[j]) < f(xs[j+1]):
                xs[j],xs[j+1] =xs[j+1],xs[j]
                
    return xs             

In [76]:
xs = [1000,20000.35,3.6,40.6,50,6000,70]
my_sort(xs)

[20000.35, 6000, 1000, 70, 50, 40.6, 3.6]

In [40]:
def my_sort_dict(xs, f = lambda x:x[1]):
    xs = [(k,v) for k,v in xs.items()]
    return {k: v for (k,v) in my_sort(xs,f)}
    

In [66]:
test_dict = {"customer1":9000, "customer2":5000}
test_dict


{'customer1': 9000, 'customer2': 5000}

In [67]:
test_dict.items()

dict_items([('customer1', 9000), ('customer2', 5000)])

In [68]:
my_sort_dict(test_dict)

{'customer1': 9000, 'customer2': 5000}

## Applying to our dataset

In [5]:
import pandas as pd
import sqlite3

In [6]:
conn = sqlite3.connect("churn.db")

In [7]:
LTV= pd.read_sql("select CustomerID,TotalCharges from churn_all",conn)

In [8]:
LTV.TotalCharges

0        108.15
1        151.65
2         820.5
3       3046.05
4        5036.3
         ...   
7038     1419.4
7039     1990.5
7040     7362.9
7041     346.45
7042     6844.5
Name: TotalCharges, Length: 7043, dtype: object

In [9]:
LTV["TotalCharges"] = pd.to_numeric(LTV["TotalCharges"],errors="coerce")

In [10]:
LTV

Unnamed: 0,CustomerID,TotalCharges
0,3668-QPYBK,108.15
1,9237-HQITU,151.65
2,9305-CDSKC,820.50
3,7892-POOKP,3046.05
4,0280-XJGEX,5036.30
...,...,...
7038,2569-WGERO,1419.40
7039,6840-RESVB,1990.50
7040,2234-XADUH,7362.90
7041,4801-JZAZL,346.45


In [11]:
LTV = LTV.set_index("CustomerID").T.to_dict("records")[0]

In [12]:
LTV

{'3668-QPYBK': 108.15,
 '9237-HQITU': 151.65,
 '9305-CDSKC': 820.5,
 '7892-POOKP': 3046.05,
 '0280-XJGEX': 5036.3,
 '4190-MFLUW': 528.35,
 '8779-QRDMV': 39.65,
 '1066-JKSGK': 20.15,
 '6467-CHFZW': 4749.15,
 '8665-UTDHZ': 30.2,
 '8773-HHUOZ': 1093.1,
 '6047-YHPVI': 316.9,
 '5380-WJKOV': 3549.25,
 '8168-UQWWF': 1105.4,
 '7760-OYPDY': 144.15,
 '9420-LOJKX': 1426.4,
 '7495-OOKFY': 633.3,
 '1658-BYGOY': 1752.55,
 '5698-BQJOH': 857.25,
 '5919-TMRGD': 79.35,
 '9191-MYQKX': 496.9,
 '8637-XJIVR': 927.35,
 '0278-YXOOG': 113.85,
 '4598-XLKNJ': 2514.5,
 '3192-NQECA': 7611.85,
 '0486-HECZI': 5238.9,
 '4846-WHAFZ': 2868.15,
 '5299-RULOA': 1064.65,
 '0404-SWRVG': 229.55,
 '4412-YLTKF': 2135.5,
 '6207-WIOLX': 1502.65,
 '3091-FYHKI': 35.45,
 '2372-HWUHI': 81.25,
 '0390-DCFDQ': 70.45,
 '4080-OGPJL': 563.65,
 '2135-RXIHG': 45.65,
 '3874-EQOEP': 655.5,
 '0867-MKZVY': 1592.35,
 '3376-BMGFE': 273.0,
 '3445-HXXGF': 2651.2,
 '1875-QIVME': 242.8,
 '0691-JVSYA': 5000.2,
 '2656-FMOKZ': 1145.7,
 '2070-FNEXE': 503

In [13]:
my_sort_dict(LTV)

NameError: name 'my_sort_dict' is not defined

## Tagging customers

In [1]:
def create_seg(df,LTV_col,target_col,LTV_value,upper_seg = "High",lower_seg ="Low"):
    df[target_col]= df[LTV_col].map(lambda x:upper_seg if x > LTV_value else lower_seg)
    return df

In [14]:
df = pd.read_sql("select * from churn_all",conn)

In [15]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

In [16]:
LTV_value = df.TotalCharges.quantile(0.80)

In [51]:
churn_tagged = create_seg(df,"TotalCharges","LTV_segment",LTV_value)

In [52]:
churn_tagged.LTV_segment.value_counts()

Low     5636
High    1407
Name: LTV_segment, dtype: int64

## Creating the LTV_analysis class

In [24]:
class LTV_analysis:
    
    def __init__(self):
        pass
    
    def my_sort(self,xs,f=lambda x:x):
        n = len(xs)
        for i in range(n - 1):
            for j in range(0, n-i-1):
                if f(xs[j]) < f(xs[j+1]):
                    xs[j],xs[j+1] =xs[j+1],xs[j]

        return xs 
   
    def my_sort_dict(self,xs, f = lambda x:x[1]):
        xs = [(k,v) for k,v in xs.items()]
        return {k: v for (k,v) in my_sort(xs,f)}
    
    def create_seg(self,df,LTV_col,target_col,LTV_value,upper_seg = "High",lower_seg ="Low"):
        self.df[target_col]= self.df[LTV_col].map(lambda x:upper_seg if x > LTV_value else lower_seg)
        return df
    
   
    
    

In [25]:
l = LTV_analysis()

In [26]:
l.my_sort_dict(LTV)

NameError: name 'my_sort' is not defined

In [27]:
#l.create_seg(df,"TotalCharges","LTV_segment",LTV_value)

AttributeError: 'LTV_analysis' object has no attribute 'df'