# KIVA

Kiva.org is an online crowdfunding platform to extend financial services to poor and financially excluded people around the world. Kiva lenders have provided over $1 billion dollars in loans to over 2 million people. In order to set investment priorities, help inform lenders, and understand their target communities, knowing the level of poverty of each borrower is critical. However, this requires inference based on a limited set of information for each borrower.


## Loading Libraries

In [None]:
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import seaborn as sns
import folium
import plotly.express as px

## Importing Data

In [None]:
kiva_loans = pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/kiva_loans.csv")
kiva_mpi_region_location = pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/kiva_mpi_region_locations.csv")
loan_theme_ids = pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/loan_theme_ids.csv")
loan_themes_by_region = pd.read_csv("../input/data-science-for-good-kiva-crowdfunding/loan_themes_by_region.csv")

In [None]:
kiva_loans["date"] = pd.to_datetime(kiva_loans["date"], format = "%Y-%m-%d")
kiva_loans["year"] = kiva_loans.date.dt.year
kiva_loans["month"] = kiva_loans.date.dt.month_name()
kiva_loans["day_week"] = kiva_loans.date.dt.day_name()
kiva_loans["number_month"]=kiva_loans.date.dt.month
kiva_loans

In [None]:
kiva_mpi_region_location

In [None]:
loan_theme_ids

In [None]:
loan_themes_by_region

## Check for missing data

In [None]:
kiva_loans.info()

In [None]:
kiva_mpi_region_location.info()

In [None]:
loan_theme_ids.info()

In [None]:
loan_themes_by_region.info()

## Data exploration

### Number of loans by currency

- PHP: Philippines
- USD: American Dollars
- KES: Kenia
- KHR: Cambodian riel


In [None]:
currency_loans = kiva_loans.groupby(["currency"]).id.count().reset_index().sort_values(by="id", ascending=False).iloc[:20]

plt.figure(figsize=(15,10))
chart =sns.barplot(data= currency_loans, x="currency", y="id")
plt.xticks(rotation=90)
for p in chart.patches:
    chart.annotate("%.0f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=12, color='black', xytext=(0, 7),
                 textcoords='offset points')
plt.title("Number of loans by currency between 2014 and 2017", fontsize=20)
plt.ylabel("Number of loans", fontsize=10)
plt.xlabel("Currency")
plt.show()


### Number of loans per year

In [None]:
loans_years = kiva_loans.groupby("year").date.count().reset_index()

fig, ax = plt.subplots()
ax.plot(loans_years.year, loans_years.date, marker="o")
for X, Y in zip(loans_years.year,loans_years.date):
    # Annotate the points 5 _points_ above and to the left of the vertex
    ax.annotate('{}'.format(Y), xy=(X,Y), xytext=(-5, 5), ha='right',
                textcoords='offset points')

plt.xticks(loans_years.year.tolist())
plt.grid(True)
plt.xlabel("Year")
plt.ylabel("Number of loans")
plt.title("Number of loans by Year".title())
plt.show()

In [None]:
sector_pie = kiva_loans.groupby("sector").country.count().reset_index().sort_values(by="country", ascending=False)
plt.figure(figsize=(10,8))
chart=sns.barplot(data = sector_pie, x="sector", y ="country")
for p in chart.patches:
    chart.annotate("%.0f" % p.get_height(), (p.get_x() + p.get_width() / 2., p.get_height()),
                 ha='center', va='center', fontsize=12, color='black', xytext=(0, 7),
                 textcoords='offset points')
plt.xticks(rotation=90)
plt.title("Number of loans by sector".title(), fontsize=18)
plt.xlabel("Sector", fontsize=12)
plt.ylabel("number of loans".title())
plt.show()

### Number of loans in different years

In [None]:
month_year_loans= kiva_loans.groupby(["year","number_month","month"]).date.count().reset_index().sort_values(by=["year","number_month"])
plt.figure(figsize=(20,10))
chart=sns.lineplot(data=month_year_loans, x="month", y="date", hue="year", palette='jet', linewidth=2.5)
plt.xticks(rotation=30)
plt.grid(True)
plt.title("numbers of loans per year".title(), fontsize=20)
plt.ylabel("numbers of loans".title())
plt.xlabel("months".title())
plt.show()

### Distribution of repayment

In [None]:
repayment=kiva_loans.groupby(["repayment_interval"]).date.count().reset_index()
repayment
plt.figure(figsize=(8,6))
plt.pie(repayment.date,labels=repayment.repayment_interval, autopct="%0.2f%%")
plt.axis("equal")
plt.title("Distribution of listed repayment_interval".title(), fontsize=15)
plt.legend()
plt.show()


### Loans by activity in three different Sectors

In [None]:
plt.figure(figsize=(20,12))

plt.subplot(2,2,1)
food=kiva_loans[kiva_loans.sector=="Food"]
food_loans = food.groupby(["sector","activity"]).date.count().reset_index().sort_values(by="date", ascending=False).iloc[:10]
sns.barplot(y="activity", x='date', data=food_loans, palette='Accent')
for i, v in enumerate(food_loans.date): 
        plt.text(.6,i+0.1, round(v,2),fontsize=10,color='k')
plt.title("Food sector".title())
plt.xlabel("Number of loans")
plt.ylabel("activity".title())


plt.subplot(2,2,2)
food=kiva_loans[kiva_loans.sector=="Arts"]
food_loans = food.groupby(["sector","activity"]).date.count().reset_index().sort_values(by="date", ascending=False).iloc[:10]
sns.barplot(y="activity", x='date', data=food_loans, palette='Accent')
for i, v in enumerate(food_loans.date): 
        plt.text(.6,i+0.1, round(v,2),fontsize=10,color='k')
plt.title("art sctor".title())
plt.xlabel("Number of loans")
plt.ylabel("activity".title())

plt.subplot(2,1,2)
food=kiva_loans[kiva_loans.sector=="Agriculture"]
food_loans = food.groupby(["sector","activity"]).date.count().reset_index().sort_values(by="date", ascending=False).iloc[:10]
sns.barplot(y="activity", x='date', data=food_loans, palette='Accent')
for i, v in enumerate(food_loans.date): 
        plt.text(.6,i+0.1, round(v,2),fontsize=10,color='k')
plt.title("Agriculture sector".title())
plt.xlabel("Number of loans")
plt.ylabel("activity".title())
plt.show()

### Boxplot of loan amount

In [None]:
plt.figure(figsize=(16,8))

poo = kiva_loans.groupby('country').mean()['loan_amount'].sort_values(ascending=False)
sns.boxplot(kiva_loans['country'], np.log(kiva_loans['loan_amount']), palette='spring',order=poo.index)
plt.xlabel('')
plt.ylabel('Loan amount ($log10$)')
plt.title('Boxplot of loan amount($log10$)')
plt.xticks(rotation=90);

### Distribution of loans on a world map

In [None]:
maps =kiva_loans.groupby("country").date.count().reset_index()

import plotly.offline as py

import plotly.graph_objs as go


data = [ dict(
        type = 'choropleth',
        locations = maps['country'],
        locationmode = 'country names',
        z = maps['date'],
        text = maps['country'],
        autocolorscale = False,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'Number of Loans'),
      ) ]

layout = dict(
    title = 'Couting of Loans Per Country',
    geo = dict(
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'Mercator'
        )
    )
)

figure = dict( data=data, layout=layout )
py.iplot(figure, validate=False, filename='map-countrys-count')

### Distribution of loans by sectors in Peru

In [None]:
df_peru = kiva_loans[kiva_loans.country=="Peru"]
df_peru = df_peru.groupby("sector").region.count().reset_index()
df_peru.style.background_gradient()