In [14]:
# Dependencies
%matplotlib notebook

In [15]:
# Import the Pandas, numpy and malplotlib library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [16]:
# Store filepath in a variable and read CSV file with the pandas library

Teens_Attempted_Suicide_DC= pd.read_csv('Teens who have Attempted Suicide in DC.csv')


In [17]:
# Print the first five rows of data to the screen
Teens_Attempted_Suicide_DC.head()

Unnamed: 0,Location,Indicator Value,Period of Measure,Breakout Category,Breakout Subcategory
0,District of Columbia,16.0,2017,Gender,Female
1,District of Columbia,16.0,2017,Gender,Male
2,District of Columbia,16.0,2017,Race/Ethnicity,Asian
3,District of Columbia,16.0,2017,Race/Ethnicity,Black or African American
4,District of Columbia,16.0,2017,Race/Ethnicity,Hispanic or Latino


In [18]:
# check total rows and columns
Teens_Attempted_Suicide_DC.shape

(35, 5)

In [19]:
# the Number of suicide attempts each year in District of Columbia.
# Problem can be seen by examining datatypes within the DataFrame
Teens_Attempted_Suicide_DC['Period of Measure'].value_counts()

2017    12
2015     7
2013     7
2011     4
2007     4
2005     1
Name: Period of Measure, dtype: int64

In [20]:
# Using colon to convert a column's data into object
Teens_Attempted_Suicide_DC= pd.read_csv('Teens who have Attempted Suicide in DC.csv', dtype={'Period of Measure':object})
Teens_Attempted_Suicide_DC.dtypes

Location                 object
Indicator Value         float64
Period of Measure        object
Breakout Category        object
Breakout Subcategory     object
dtype: object

In [21]:
# Set x axis and tick locations
x_axis = np.arange(len(Teens_Attempted_Suicide_DC))
tick_locations = [value+0.4 for value in x_axis]

In [22]:
# Create a list indicating where to write x labels and set figure size to adjust for space
plt.figure(figsize=(10,3))
plt.bar(x_axis, Teens_Attempted_Suicide_DC["Indicator Value"], color='b', alpha=0.5, align="edge")
plt.xticks(tick_locations, Teens_Attempted_Suicide_DC["Breakout Subcategory"], rotation="vertical")

<IPython.core.display.Javascript object>

([<matplotlib.axis.XTick at 0x24ef8e260b8>,
  <matplotlib.axis.XTick at 0x24ef8e1f9e8>,
  <matplotlib.axis.XTick at 0x24ef8791f60>,
  <matplotlib.axis.XTick at 0x24ef8e99b38>,
  <matplotlib.axis.XTick at 0x24ef8ea40b8>,
  <matplotlib.axis.XTick at 0x24ef8ea4518>,
  <matplotlib.axis.XTick at 0x24ef8ea49e8>,
  <matplotlib.axis.XTick at 0x24ef8ea4eb8>,
  <matplotlib.axis.XTick at 0x24ef8ead3c8>,
  <matplotlib.axis.XTick at 0x24ef8ead898>,
  <matplotlib.axis.XTick at 0x24ef8ea4f98>,
  <matplotlib.axis.XTick at 0x24ef8ea40f0>,
  <matplotlib.axis.XTick at 0x24ef8eaddd8>,
  <matplotlib.axis.XTick at 0x24ef8eb32e8>,
  <matplotlib.axis.XTick at 0x24ef8eb37b8>,
  <matplotlib.axis.XTick at 0x24ef8eb3c88>,
  <matplotlib.axis.XTick at 0x24ef8eba198>,
  <matplotlib.axis.XTick at 0x24ef8eba6a0>,
  <matplotlib.axis.XTick at 0x24ef8ebac18>,
  <matplotlib.axis.XTick at 0x24ef8eba5f8>,
  <matplotlib.axis.XTick at 0x24ef8eb3b00>,
  <matplotlib.axis.XTick at 0x24ef8eada58>,
  <matplotlib.axis.XTick at 0x24

In [23]:
# Set x and y limits
plt.xlim(-0.25, len(x_axis))
plt.ylim(0, max(Teens_Attempted_Suicide_DC["Indicator Value"])+5)

(0, 21.0)

In [24]:
# Set a Title and labels
plt.title("Teens attempted to commit suicide in DC by Gender, Race/Ethnicity, Sexual Identity (2005-2017)")
plt.xlabel("Gender, Race/Ethnicity, Sexual Identity")
plt.ylabel("Percentage of teens who have attempted to commit suicide")


Text(0, 0.5, 'Percentage of teens who have attempted to commit suicide')

In [25]:
# Save our graph and show the graph
plt.tight_layout()
plt.savefig("Images/suicide attempted by teens.png")
plt.show()

  


In [26]:
# Check to see if there are any rows with missing data
Teens_Attempted_Suicide_DC.loc[ : , "Breakout Subcategory"]

0                        Female
1                          Male
2                         Asian
3     Black or African American
4            Hispanic or Latino
5                Multiple Races
6                         White
7                      Bisexual
8                Gay or Lesbian
9     Gay, Lesbian, or Bisexual
10      Heterosexual (Straight)
11                     Not Sure
12                       Female
13                         Male
14                        Asian
15    Black or African American
16           Hispanic or Latino
17               Multiple Races
18                        White
19                       Female
20                         Male
21                        Asian
22    Black or African American
23           Hispanic or Latino
24               Multiple Races
25                        White
26                       Female
27                         Male
28                        Black
29                     Hispanic
30                       Female
31      

In [27]:
# Create a group based on the values in the 'Breakout Subcategory' and 'Breakout Category' column to check statistics
# Check to see if there are any rows with missing data
Gender_Teens_Attempted_Suicide_DC = Teens_Attempted_Suicide_DC.groupby(["Breakout Category", "Breakout Subcategory"])

Gender_Teens_Attempted_Suicide_DC.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Indicator Value,Indicator Value,Indicator Value,Indicator Value,Indicator Value,Indicator Value,Indicator Value,Indicator Value
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Breakout Category,Breakout Subcategory,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Gender,Female,5.0,13.16,1.732917,11.5,12.2,12.7,13.4,16.0
Gender,Male,5.0,13.16,1.732917,11.5,12.2,12.7,13.4,16.0
Race/Ethnicity,Asian,3.0,14.033333,1.738774,12.7,13.05,13.4,14.7,16.0
Race/Ethnicity,Black,2.0,11.85,0.494975,11.5,11.675,11.85,12.025,12.2
Race/Ethnicity,Black or African American,3.0,14.033333,1.738774,12.7,13.05,13.4,14.7,16.0
Race/Ethnicity,Hispanic,2.0,11.85,0.494975,11.5,11.675,11.85,12.025,12.2
Race/Ethnicity,Hispanic or Latino,3.0,14.033333,1.738774,12.7,13.05,13.4,14.7,16.0
Race/Ethnicity,Multiple Races,3.0,14.033333,1.738774,12.7,13.05,13.4,14.7,16.0
Race/Ethnicity,White,3.0,14.033333,1.738774,12.7,13.05,13.4,14.7,16.0
Sexual Identity,Bisexual,1.0,16.0,,16.0,16.0,16.0,16.0,16.0


# Line graph represented on the Teen_Attempted_suicide_DC2

In [28]:
# represented on the Teen_Attempted_suicide_DC2


# Get current axis (ax allows reuse an axis to plot multiple lines)
# ax = plt.gca() 

# Teens_Attempted_Suicide_DC.plot(kind='line', x='Period of Measure', y='Indicator Value', color='purple', ax=ax, linestyle='solid', linewidth=1.5, fontsize=8.5, figsize=(8,4))

# Adding customized grid line
# ax.grid(linestyle=':', linewidth='0.5', color='gray')

# Adding Title
# plt.title("Teens attempted to commit suicide in DC (2005-2017)", fontname="Arial", fontsize=13)

# Adding Labels
# plt.xlabel('Year', fontname="Arial", fontsize=9)
# plt.ylabel('Percentage',fontname="Arial", fontsize=9)




In [29]:
# Display the plot
# plt.show()

# Save plot as PDF
# plt.savefig('Images/Teens attempted to commit suicide in DC (2005-2017).png')