In [1]:
%matplotlib widget
import pandas as pd
from pathlib import Path
from pandas.api.types import CategoricalDtype
import matplotlib.pyplot as plt
import geopandas

from utils import annotate_plot

In [2]:
response_file = Path("responses.csv")
data = pd.read_csv(
    response_file, skiprows=[0, 1], header=None, index_col=0, dtype=object
)
drop_rows = [
    "3pcu3xeho94or13fmgo3pcu3vu7ufm3r",
    "gbeoi35j09sxzk2295regbeoi35w9xsn",
    "h3s656mrygc53ln0hik2h3s65acyqwhx",
]
data = data.drop(drop_rows)

In [None]:
operating_systems = [
    "macOS",
    "BSD",
    "Windows 7",
    "Windows 10",
    "Debian or its derivatives (Ubuntu, Mint, etc.)",
    "Red Hat or its derivatives (Fedora, CentOS, etc.)",
    "Other",
]
interfaces = ["Python", "MATLAB", "C++", "C", "Fortran"]
install_methods = [
    "I don't update",
    "Somebody else manages it for me",
    "Anaconda",
    "Build from source (via GitHub)",
    "Windows binaries (.msi file), downloaded from GitHub",
    "macOS installer (.pkg file), downloaded from GitHub",
    "Ubuntu Personal Package Archive (PPA)",
    "Other",
]
frequency = CategoricalDtype(["Often", "Sometimes", "Rarely", "Never"])


In [None]:
interface_data = data[range(8, 13)]
interface_data.columns = interfaces
n_responses = len(interface_data.index)
fig, ax = plt.subplots()
(interface_data.count().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(
    ax,
    f"Which interface(s) do you use to access Cantera? {n_responses} Responses",
    n_responses,
)

In [None]:
fig, ax = plt.subplots()
os_data = data[range(1, 8)]
os_data.columns = operating_systems
n_responses = len(os_data.index)
(os_data.count().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(
    ax, f"What operating system(s) do you use? {n_responses} Responses", n_responses
)

In [None]:
fig, ax = plt.subplots()
install_data = data[range(13, 21)]
install_data.columns = install_methods
n_responses = len(install_data.index)
(install_data.count().sort_index() / n_responses).plot.barh(ax=ax, fontsize=5)
annotate_plot(
    ax,
    "Do you use the following sources to install and/or upgrade Cantera?",
    n_responses,
)

In [None]:
fig, ax = plt.subplots()
ct_versions = CategoricalDtype(
    [
        "2.4 (released Aug. 2018)",
        "2.3 (released Jan. 2017)",
        "2.2 (released Jan. 2016)",
        "2.1 (released Apr. 2015)",
        "Older than 2.1",
        "Development version / master branch",
        "I’m not sure",
    ]
)
ct_version_data = data[21].dropna().astype(ct_versions)
n_responses = len(ct_version_data.index)
(ct_version_data.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(ax, "What version of Cantera do you use most often?", n_responses)

In [None]:
fig, ax = plt.subplots()
python_freq_data = data[22].fillna("Never").astype(frequency)
n_responses = len(python_freq_data.index)
(python_freq_data.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(ax, "How often do you use the Python interface?", n_responses)

In [None]:
fig, ax = plt.subplots()
python_versions = CategoricalDtype(["None"] + [f"3.{x}" for x in [5, 6, 7, 8]])
python_ver_data = data[23].fillna("None").astype(python_versions)
n_responses = len(python_ver_data.index)
(python_ver_data.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(ax, "Which version of Python do you use the most?", n_responses)

In [None]:
fig, ax = plt.subplots()
matlab_freq_data = data[24].fillna("Never").astype(frequency)
n_responses = len(matlab_freq_data.index)
(matlab_freq_data.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(ax, "How often do you use the MATLAB interface?", n_responses)

In [None]:
fig, ax = plt.subplots()
py_mat_freq_data = data[[22, 24]].fillna("Never").astype(frequency)
n_responses = len(py_mat_freq_data.index)
(py_mat_freq_data.apply(pd.Series.value_counts).sort_index() / n_responses).plot.barh(ax=ax)
ax.legend(["Python", "MATLAB"])
annotate_plot(
    ax, f"How often do you use the X interface? {n_responses} Responses", n_responses
)

In [None]:
fig, ax = plt.subplots()
continents = pd.CategoricalDtype(
    [
        "Africa",
        "Antarctica",
        "Asia",
        "Australia",
        "Europe",
        "North America",
        "South America",
    ]
)
location = data[105].astype(continents)
n_responses = len(location.index)
(location.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(ax, f"Where do you live? {n_responses} Responses", n_responses)

In [None]:
fig, ax = plt.subplots()
support = data[98].astype("category")
n_responses = len(support.index)
(support.value_counts().sort_index() / n_responses).plot.barh(ax=ax)
annotate_plot(
    ax,
    f"When you need help with Cantera, what is your_ first_ step to get support? {n_responses} Responses",
    n_responses,
)

In [None]:
fig, ax = plt.subplots()
donation = data[97].astype("category")
n_responses = len(donation.index)
donation.value_counts(normalize=True).sort_index().plot.barh(ax=ax)
annotate_plot(
    ax,
    f"Have you ever made a tax-deductible donation to Cantera through NumFOCUS? {n_responses} Responses",
    n_responses,
)

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

In [None]:
fig, ax = plt.subplots()
ax.set_aspect("equal")
world.plot(ax=ax, color='white', edgecolor='black');

In [None]:
users = [
    {"name": "Louisiana State University", "Latitude":30.4132579 , "Longitude": -91.182191},
    {"name": "Oak Ridge National Laboratory", "Latitude": 35.9411409, "Longitude": -84.3275049},
    {"name": "California State University, Los Angeles", "Latitude": 34.0636856, "Longitude": -118.1685726},
    {"name": "Middle East Technical University", "Latitude": 39.8898382, "Longitude": 32.7778973},
    {"name": "Siemens", "Latitude": 41.4249373, "Longitude": -73.2276351},
    {"name": "Streamline Numerics, Inc.",},
    {"name": "ArcelorMittal",},
    {"name": "Zhejiang University",},
    {"name": "NIST",},
    {"name": "MiT",},
    {"name": "Georgia Institute of Technoloy",},
    {"name": "Shell",},
    {"name": "Northeastern University",},
    {"name": "Colorado School of Mines",},
    {"name": "University of Connecticut",},
    {"name": "University of Vaasa",},
    {"name": "Innovative Scientific Solutions Inc.",},
    {"name": "Purdue University",},
    {"name": "KIT",},
    {"name": "Warsaw university of technology",},
    {"name": "Oregon State University",},
    {"name": "LBS",},
    {"name": "Linde",},
    {"name": "National University of Ireland, Galway",},
    {"name": "Cascade Technologies",},
    {"name": "University of Canterbury",},
    {"name": "Argonne National Laboratory",},
    {"name": "Aalborg University",},
    {"name": "Hunan University, China",},
    {"name": "BASF",},
    {"name": "Trinity College Dublin",},
    {"name": "University of California, Los Angeles",},
    {"name": "Federal University of Pampa (UNIPAMPA)",},
    {"name": "Columbia University",},
    {"name": "University of Caen",},
    {"name": "Loughborough University",},
]

users = pd.DataFrame(users)

In [None]:
gdf = geopandas.GeoDataFrame(
    users, geometry=geopandas.points_from_xy(users.Longitude, users.Latitude))

In [None]:
gdf.plot(ax=ax, color='b');

In [None]:
fig, ax = plt.subplots()
employment = data[107].astype("category").dropna()
n_responses = len(employment.index)
employment.value_counts(normalize=True).sort_index().plot.barh(ax=ax)
annotate_plot(
    ax,
    f"What is your employment status? {n_responses} Responses",
    n_responses,
)
fig.tight_layout()

In [3]:
# fig, ax = plt.subplots()
priority_responses = CategoricalDtype([
    "New thermodynamics, kinetics, or transport models",
    "Equilibrium solvers - performance",
    "Equilibrium solvers - robustness",
    "Reactor networks - performance",
    "Reactor networks - robustness",
    "1D solver - performance",
    "1D solver - robustness",
    "Feature and model documentation",
    "Examples and tutorials",
    "Python interface",
    "Matlab interface",
    "Fortran interface",
    "New language interfaces (for example, Julia)",
    "New reactor or flow types (for example, transient 1-D or plug flow)",
    "Improvement of ReactionPathDiagram",
    "Updated and Comprehensive Documentation for FORTRAN interface",
    "Adjoint Sensitivity Analysis",
])
priorities = data[range(83, 89)]
priorities.head()

Unnamed: 0_level_0,83,84,85,86,87,88
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
r2w0retelulfeavbz8nkr2w04hf2b6nx,Python interface,,Reactor networks - robustness,,Examples and tutorials,
twg8u7g64abr2oatwg8ucv1b7s986kai,Examples and tutorials,,"New reactor or flow types (for example, transi...",,1D solver - robustness,
tka9zbm6vm95yi8htka9z4pw4f40lsi4,Reactor networks - robustness,,"New thermodynamics, kinetics, or transport models",,Equilibrium solvers - robustness,
1ye8f3owozejbsniw71ye8fh0bss35gw,Examples and tutorials,,Feature and model documentation,,"New reactor or flow types (for example, transi...",
wm9wegazlc9lec5q2rp9wm9weqhfph4m,"New reactor or flow types (for example, transi...",,Reactor networks - robustness,,Reactor networks - performance,


In [None]:
highest = pd.merge(priorities[83].value_counts(), priorities[84].value_counts(), left_index=True, right_index=True)

In [None]:
highest.dtypes

In [None]:
middle = pd.DataFrame(pd.concat((priorities[85].value_counts(), priorities[86].value_counts())), columns=["Middle"])

In [None]:
middle.dtypes

In [None]:
priorities["Highest"] = priorities[83] + priorities[84]

In [None]:
priorities["Highest"] = priorities[[83, 84]].values[~np.isnan(priorities[[83, 84]].values)]

In [None]:
import numpy as np

In [None]:
priorities.loc[pd.isna(priorities[83]), 83] = priorities.loc[~pd.isna(priorities[84]), 84]

In [None]:
priorities[83]

In [None]:
priorities[84]

In [5]:
mask = pd.isna(priorities[83])

In [13]:
priorities["Highest"] = priorities[83].fillna(priorities[84])
priorities["Middle"] = priorities[85].fillna(priorities[86])
priorities["Lowest"] = priorities[87].fillna(priorities[88])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  priorities["Highest"] = priorities[83].fillna(priorities.loc[:, 84])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  priorities["Middle"] = priorities[85].fillna(priorities.loc[:, 86])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  priorities["Lowest"] = priorities[87].fillna(priorities.loc[:, 88])


In [11]:
priorities.loc[:, ("Highest", "Middle", "Lowest")]

Unnamed: 0_level_0,Highest,Middle,Lowest
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
r2w0retelulfeavbz8nkr2w04hf2b6nx,Python interface,Reactor networks - robustness,Examples and tutorials
twg8u7g64abr2oatwg8ucv1b7s986kai,Examples and tutorials,"New reactor or flow types (for example, transi...",1D solver - robustness
tka9zbm6vm95yi8htka9z4pw4f40lsi4,Reactor networks - robustness,"New thermodynamics, kinetics, or transport models",Equilibrium solvers - robustness
1ye8f3owozejbsniw71ye8fh0bss35gw,Examples and tutorials,Feature and model documentation,"New reactor or flow types (for example, transi..."
wm9wegazlc9lec5q2rp9wm9weqhfph4m,"New reactor or flow types (for example, transi...",Reactor networks - robustness,Reactor networks - performance
qec8q0jidspzgbqec8y2l54mpbhndmv9,1D solver - robustness,"New thermodynamics, kinetics, or transport models",Python interface
dqpb6tpqd8wyk4t8vdqpb6b4bmrxbw3s,Reactor networks - robustness,"New reactor or flow types (for example, transi...","New thermodynamics, kinetics, or transport models"
27ca0l3byvqo1o1u6ps627cmdxl0b8b3,Reactor networks - performance,1D solver - performance,"New reactor or flow types (for example, transi..."
g25e7r3qhr3q9wlsyg25e7zg2mkn3nvj,1D solver - performance,Reactor networks - robustness,1D solver - robustness
84h4yz86is650mt6rvx84h4yzttmijlq,Equilibrium solvers - robustness,1D solver - robustness,"New thermodynamics, kinetics, or transport models"


In [20]:
ranking = pd.concat([priorities[i].fillna(priorities[i + 1]) for i in (83, 85, 87)], axis=1).astype(priority_responses)
ranking.columns = ["Highest", "Middle", "Lowest"]

Highest    category
Middle     category
Lowest     category
dtype: object

In [30]:
fig, ax = plt.subplots(figsize=(9, 6))
ranking.apply(pd.Series.value_counts).plot.barh(ax=ax)
fig.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …