In [1]:
from pyspark import SparkContext, SparkConf

## Amount Spent by Customers
### Steps followed:
1. Read ecommerece data.
2. Parse each line of the data to return (customer id, amount spent)
3. Create RDD to save the parse data in <K,V> format.
4. Find total amount spent per customer
\begin{equation*}
(age, (\sum custId, \sum AmountSpent))
\end{equation*}
5. Change the K,V pair to V,K and sort in increasing order of amount spent

In [2]:
def customer_amount():
    conf = SparkConf().setMaster("local").setAppName("Ecommerece")
    sc = SparkContext(conf=conf)

    #Customer ID, Item ID, Amount Spent
    ecomm_dataset = sc.textFile("file:////Users/amoghmishra/Desktop/AmoghM/ApacheSpark/dataset/customer-orders.csv")
    ecomm_rdd = ecomm_dataset.map(preprocess)
    cust_amt = ecomm_rdd.reduceByKey(lambda x,y : x+y).map(lambda (x,y): (y,x)) # (cust_id,amt) --> (amt,cust_id)
    sort_cust_amt = cust_amt.sortByKey() #sorted in ascending order on the basis of amount

    result = sort_cust_amt.collect()
    print "TOTAL AMOUNT SPENT BY THE CUSTOMER IN ASCENDING ORDER"
    for res in result:
        print res[1], res[0]


In [3]:
def preprocess(line):
    data = line.split(",")
    cust_id = int(data[0])
    amt = float(data[2])
    return (cust_id,amt)

In [4]:
if __name__=="__main__":
    customer_amount()

TOTAL AMOUNT SPENT BY THE CUSTOMER IN ASCENDING ORDER
45 3309.38
79 3790.57
96 3924.23
23 4042.65
99 4172.29
75 4178.5
36 4278.05
98 4297.26
47 4316.3
77 4327.73
13 4367.62
48 4384.33
49 4394.6
94 4475.57
67 4505.79
50 4517.27
78 4524.51
5 4561.07
57 4628.4
83 4635.8
91 4642.26
74 4647.13
84 4652.94
3 4659.63
12 4664.59
66 4681.92
56 4701.02
21 4707.41
80 4727.86
14 4735.03
37 4735.2
7 4755.07
44 4756.89
31 4765.05
82 4812.49
4 4815.05
10 4819.7
88 4830.55
20 4836.86
89 4851.48
95 4876.84
38 4898.46
76 4904.21
86 4908.81
27 4915.89
18 4921.27
53 4945.3
1 4958.6
51 4975.22
16 4979.06
30 4990.72
28 5000.71
22 5019.45
29 5032.53
17 5032.68
60 5040.71
25 5057.61
19 5059.43
81 5112.71
69 5123.01
65 5140.35
11 5152.29
35 5155.42
40 5186.43
87 5206.4
52 5245.06
26 5250.4
62 5253.32
33 5254.66
24 5259.92
93 5265.75
64 5288.69
90 5290.41
55 5298.09
9 5322.65
34 5330.8
72 5337.44
70 5368.25
43 5368.83
92 5379.28
6 5397.88
15 5413.51
63 5415.15
58 5437.73
32 5496.05
61 5497.48
85 5503.43
8 5517.2