# Import Package

Import those two pagkage from python libiary 'pandas','geopy'.

In [3]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

# Import 'orders.csv' and set the chunksize

### Parameters

- **chunksize**: 
  The size of the batch to read from the CSV file at a time (default is set to 100).

- **chunk**: 
  A chunk of the orders DataFrame.

- **lists**: 
  List of elements to count.

- **locations**: 
  List of location names.

- **source_coords**: 
  List of tuples containing source coordinates (latitude, longitude).

- **destination_coords**: 
  List of tuples containing destination coordinates (latitude, longitude).

- **payment_amounts**: 
  List of customer payment amounts.

- **weights**: 
  List of item weights.

- **distances**: 
  List of distances.


In [None]:
chunksize = 100
orders = pd.read_csv('orders.csv', chunksize=chunksize)
print(orders)



## **1. Aggregate Customer Payment Amount**
```python
agg_functions = { 'customer_payment_amount': 'sum', }
aggspendpercustomer = chunk.groupby(chunk['customer_id']).aggregate(agg_functions)
```
- Aggregates the `customer_payment_amount` for each `customer_id` in the current chunk.
- The results are stored in `aggspendpercustomer`, which records the total payment amount for each customer.

---

## **2. Count Source Locations**
```python
lists = chunk['source']
count_source = dict()
for i in lists:
    if i in count_source:
        count_source[i] += 1
    else:
        count_source[i] = 1
```
- Iterates through the `source` column (list of source locations) in the current chunk.
- Counts the occurrences of each source location and stores the results in the `count_source` dictionary.

---

## **3. Count Destination Locations**
```python
listd = chunk['destination']
total = len(listd)
count_destination = dict()
for i in listd:
    if i in count_destination:
        count_destination[i] += 1
    else:
        count_destination[i] = 1
```
- Similar to the source location count, it counts the occurrences of each destination location in the `destination` column of the current chunk.
- `total` stores the total number of destinations.

---

## **4. Fetch Latitude and Longitude for Source Locations**
```python
loc = Nominatim(user_agent="GetLoc")
sourcelatitudes = []
sourcelongitudes = []
for location in lists:
    getLoc = loc.geocode(location)
    sourcelatitudes.append(getLoc.latitude)
    sourcelongitudes.append(getLoc.longitude)
```
- Uses the `Nominatim` geocoding service from the `geopy` library to fetch latitude and longitude for each source location in the `source` column.
- Stores the latitude and longitude values in the `sourcelatitudes` and `sourcelongitudes` lists, respectively.

---

## **5. Fetch Latitude and Longitude for Destination Locations**
```python
destinationlatitudes = []
destinationlongitudes = []
for location in listd:
    getLoc = loc.geocode(location)
    destinationlatitudes.append(getLoc.latitude)
    destinationlongitudes.append(getLoc.longitude)
```
- Similar to the source geocoding process, it fetches latitude and longitude for each destination location in the `destination` column.
- Stores the results in `destinationlatitudes` and `destinationlongitudes` lists.

---

## **6. Calculate Geodesic Distances**
```python
distances = []
for i in range(len(sourcelatitudes)):
    source_coords = (sourcelatitudes[i], sourcelongitudes[i])
    destination_coords = (destinationlatitudes[i], destinationlongitudes[i])
    distance = geodesic(source_coords, destination_coords).miles
    distances.append(distance)
```
- Calculates the geodesic distance (in miles) between each pair of source and destination coordinates using the `geodesic` function from `geopy`.
- Stores the calculated distances in the `distances` list.

---

## **7. Calculate Cost Per Mile**
```python
listw = chunk['item_weight']
listm = chunk['customer_payment_amount']
costpermiles = []
for x in range(len(listm)):
    cost = listm[x] / (listw[x] * distances[x])
    costpermiles.append(float(cost))
```
- For each order:
  - Uses the customer payment amount (`listm`), item weight (`listw`), and distance (`distances`) to calculate the cost per mile:
    \[
    \text{Cost per mile} = \frac{\text{Customer Payment Amount}}{\text{Item Weight} \times \text{Distance}}
    \]
  - Stores the calculated costs in the `costpermiles` list.

---

## **Summary**
This code:
1. Processes chunks of an orders dataset and aggregates customer payment information.
2. Counts the occurrences of source and destination locations.
3. Fetches geographic coordinates (latitude and longitude).
4. Calculates geodesic distances between source and destination locations.
5. Computes the cost per mile based on payment amounts, item weights, and distances.


In [None]:
for chunk in orders:
    # Aggregate customer payment amount per customer
    agg_functions = { 'customer_payment_amount': 'sum', }  
    aggspendpercustomer = chunk.groupby(chunk['customer_id']).aggregate(agg_functions)

    # Count occurrences of each source location
    lists = chunk['source']
    count_source = dict()
    for i in lists:
        if i in count_source:
            count_source[i] += 1
        else:
            count_source[i] = 1

    # Count occurrences of each destination location
    listd = chunk['destination']
    total = len(listd)
    count_destination = dict()
    for i in listd:
        if i in count_destination:
            count_destination[i] += 1
        else:
            count_destination[i] = 1

    # Total number of orders
    totalorder = len(lists)

    # Fetch latitude and longitude for source locations
    loc = Nominatim(user_agent="GetLoc")
    sourcelatitudes = []
    sourcelongitudes = []
    for location in lists:
        getLoc = loc.geocode(location)
        sourcelatitudes.append(getLoc.latitude)
        sourcelongitudes.append(getLoc.longitude)

    # Fetch latitude and longitude for destination locations
    destinationlatitudes = []
    destinationlongitudes = []
    for location in listd:
        getLoc = loc.geocode(location)
        destinationlatitudes.append(getLoc.latitude)
        destinationlongitudes.append(getLoc.longitude)

    # Calculate distances between source and destination locations
    distances = []
    for i in range(len(sourcelatitudes)):
        source_coords = (sourcelatitudes[i], sourcelongitudes[i])
        destination_coords = (destinationlatitudes[i], destinationlongitudes[i])
        distance = geodesic(source_coords, destination_coords).miles
        distances.append(distance)
    
    # Calculate cost per mile for each order    
    listw = chunk['item_weight']
    listm = chunk['customer_payment_amount']
    costpermiles = []
    for x in range(len(listm)):
        cost = listm[x] / (listw[x] * distances[x])
        costpermiles.append(float(cost))

# Print out put for "Total number of orders","The distribution of source is" and "The distribution of destination"

In [None]:
print(aggspendpercustomer)
print("Total number of orders is",totalorder)
print("The distribution of source is:",count_source,sep='\n')
print("The distribution of destination is:",count_destination,sep='\n')
#print(sourcelatitudes)
#print(sourcelongitudes)
#print(destinationlatitudes)
#print(destinationlongitudes)
#print(distances)

# **8. Print Unique Source-Destination-Cost Entries**


In [None]:
setprint = set()
for i in range(len(lists)):
    item = (lists[i], listd[i], costpermiles[i])
    if item not in setprint:
        print(f"source: {item[0]}")
        print(f"destination: {item[1]}")
        print(f"cost_per_miles: {item[2]:.2f}\n")
        setprint.add(item)
